Exemplo n.º 1
0
 def test_pipeline(self):
     with intercept_results(Iris, 'target', Predict, 'prediction') as (y_true, y_pred):
         self.assertFalse(execute(
             [
                 ('datasets|Iris', identifier, []),
                 ('preprocessing|StandardScaler', identifier, []),
                 ('feature_selection|SelectKBest', identifier,
                     [('k', [('Integer', '2')])]),
                 ('classifiers|LinearSVC', identifier, []),
                 ('Pipeline', identifier, []),
                 ('Predict', identifier, [])
             ],
             [
                 # feed data to pipeline
                 (0, 'data', 4, 'training_data'),
                 (0, 'target', 4, 'training_target'),
                 # put models in pipeline
                 (1, 'model', 4, 'model1'),
                 (2, 'model', 4, 'model2'),
                 (3, 'model', 4, 'model3'),
                 # predict using pipeline
                 (4, 'model', 5, 'model'),
                 (0, 'data', 5, 'data')
             ]
         ))
         y_true, y_pred = np.array(y_true[0]), np.array(y_pred[0])
         self.assertEqual(y_true.shape, y_pred.shape)
         self.assertTrue(np.mean(y_true == y_pred) > .8)
Exemplo n.º 2
0
 def test_train_test_split(self):
     # check that we can split the iris dataset
     with intercept_results(TrainTestSplit, 'training_data', TrainTestSplit,
                            'training_target', TrainTestSplit, 'test_data',
                            TrainTestSplit, 'test_target') as results:
         X_train, y_train, X_test, y_test = results
         self.assertFalse(execute(
             [
                 ('datasets|Iris', identifier, []),
                 ('cross-validation|TrainTestSplit', identifier,
                  [('test_size', [('Integer', '50')])])
             ],
             [
                 (0, 'data', 1, 'data'),
                 (0, 'target', 1, 'target')
             ]
         ))
     X_train = np.vstack(X_train)
     X_test = np.vstack(X_test)
     y_train = np.hstack(y_train)
     y_test = np.hstack(y_test)
     self.assertEqual(X_train.shape, (100, 4))
     self.assertEqual(X_test.shape, (50, 4))
     self.assertEqual(y_train.shape, (100,))
     self.assertEqual(y_test.shape, (50,))
Exemplo n.º 3
0
 def test_pipeline(self):
     with intercept_results(Iris, 'target', Predict,
                            'prediction') as (y_true, y_pred):
         self.assertFalse(
             execute(
                 [('datasets|Iris', identifier, []),
                  ('preprocessing|StandardScaler', identifier, []),
                  ('feature_selection|SelectKBest', identifier, [
                      ('k', [('Integer', '2')])
                  ]), ('classifiers|LinearSVC', identifier, []),
                  ('Pipeline', identifier, []),
                  ('Predict', identifier, [])],
                 [
                     # feed data to pipeline
                     (0, 'data', 4, 'training_data'),
                     (0, 'target', 4, 'training_target'),
                     # put models in pipeline
                     (1, 'model', 4, 'model1'),
                     (2, 'model', 4, 'model2'),
                     (3, 'model', 4, 'model3'),
                     # predict using pipeline
                     (4, 'model', 5, 'model'),
                     (0, 'data', 5, 'data')
                 ]))
         y_true, y_pred = np.array(y_true[0]), np.array(y_pred[0])
         self.assertEqual(y_true.shape, y_pred.shape)
         self.assertTrue(np.mean(y_true == y_pred) > .8)
Exemplo n.º 4
0
 def test_iris(self):
     # check that the iris dataset can be loaded
     with intercept_results(Iris, 'data', Iris, 'target') as (data, target):
         self.assertFalse(execute([('datasets|Iris', identifier, [])]))
     data = np.vstack(data)
     target = np.hstack(target)
     self.assertEqual(data.shape, (150, 4))
     self.assertEqual(target.shape, (150, ))
Exemplo n.º 5
0
 def test_digits(self):
     # check that the digits dataset can be loaded
     with intercept_results(Digits, 'data', Digits,
                            'target') as (data, target):
         self.assertFalse(execute([('datasets|Digits', identifier, [])]))
     data = np.vstack(data)
     target = np.hstack(target)
     self.assertEqual(data.shape, (1797, 64))
     self.assertEqual(target.shape, (1797, ))
Exemplo n.º 6
0
 def test_iris(self):
     # check that the iris dataset can be loaded
     with intercept_results(Iris, 'data', Iris, 'target') as (data, target):
         self.assertFalse(execute([
             ('datasets|Iris', identifier, [])
         ]))
     data = np.vstack(data)
     target = np.hstack(target)
     self.assertEqual(data.shape, (150, 4))
     self.assertEqual(target.shape, (150,))
Exemplo n.º 7
0
 def test_digits(self):
     # check that the digits dataset can be loaded
     with intercept_results(Digits, 'data', Digits, 'target') as (data, target):
         self.assertFalse(execute([
             ('datasets|Digits', identifier, [])
         ]))
     data = np.vstack(data)
     target = np.hstack(target)
     self.assertEqual(data.shape, (1797, 64))
     self.assertEqual(target.shape, (1797,))
Exemplo n.º 8
0
    def test_object(self):
        """Reads an object with object or list rows.
        """
        json_files = [
            ("""
            {
                "John": {"lastname": "Smith", "age": 25, "city": "New York"},
                "Lara": {"lastname": "Croft", "age": 21, "city": "Nashville"},
                "Michael": {"lastname": "Buck", "age": 78, "city": "Goodman"}
            }
            """, True),
            ("""
            {
                "John": ["Smith", 25, "New York"],
                "Lara": ["Croft", 21, "Nashville"],
                "Michael": ["Buck", 78, "Goodman"]
            }
            """, False),
            ]

        for json_file, has_names in json_files:
            with intercept_results(JSONObject, 'value', 'column_count',
                                   'column_names') as results:
                self.assertFalse(execute([
                        ('WriteFile', 'org.vistrails.vistrails.basic', [
                            ('in_value', [('String', json_file)]),
                        ]),
                        ('read|JSONObject', identifier, []),
                    ],
                    [
                        (0, 'out_value', 1, 'file'),
                    ]))
            self.assertTrue(all((len(r) == 1) for r in results[:2]))
            (table,), (count,), names = results
            self.assertEqual(count, 4)

            import numpy
            if has_names:
                self.assertEqual(names, [table.names])
                self.assertEqual(table.names[0], 'key')
                self.assertEqual(set(table.names[1:]),
                                 set(['lastname', 'age', 'city']))
                f_city = table.names.index('city')
                f_age = table.names.index('age')
            else:
                self.assertEqual(names, [])
                self.assertIsNone(table.names)
                f_city = 3
                f_age = 2
            self.assertEqual(set(table.get_column(f_city)),
                             set(["New York", "Nashville", "Goodman"]))
            l = table.get_column(f_age, True)
            self.assertIsInstance(l, numpy.ndarray)
            self.assertEqual(set(l), set([21, 25, 78]))
Exemplo n.º 9
0
    def test_object(self):
        """Reads an object with object or list rows.
        """
        json_files = [
            ("""
            {
                "John": {"lastname": "Smith", "age": 25, "city": "New York"},
                "Lara": {"lastname": "Croft", "age": 21, "city": "Nashville"},
                "Michael": {"lastname": "Buck", "age": 78, "city": "Goodman"}
            }
            """, True),
            ("""
            {
                "John": ["Smith", 25, "New York"],
                "Lara": ["Croft", 21, "Nashville"],
                "Michael": ["Buck", 78, "Goodman"]
            }
            """, False),
        ]

        for json_file, has_names in json_files:
            with intercept_results(JSONObject, 'value', 'column_count',
                                   'column_names') as results:
                self.assertFalse(
                    execute([
                        ('WriteFile', 'org.vistrails.vistrails.basic', [
                            ('in_value', [('String', json_file)]),
                        ]),
                        ('read|JSONObject', identifier, []),
                    ], [
                        (0, 'out_value', 1, 'file'),
                    ]))
            self.assertTrue(all((len(r) == 1) for r in results[:2]))
            (table, ), (count, ), names = results
            self.assertEqual(count, 4)

            import numpy
            if has_names:
                self.assertEqual(names, [table.names])
                self.assertEqual(table.names[0], 'key')
                self.assertEqual(set(table.names[1:]),
                                 set(['lastname', 'age', 'city']))
                f_city = table.names.index('city')
                f_age = table.names.index('age')
            else:
                self.assertEqual(names, [])
                self.assertIsNone(table.names)
                f_city = 3
                f_age = 2
            self.assertEqual(set(table.get_column(f_city)),
                             set(["New York", "Nashville", "Goodman"]))
            l = table.get_column(f_age, True)
            self.assertIsInstance(l, numpy.ndarray)
            self.assertEqual(set(l), set([21, 25, 78]))
Exemplo n.º 10
0
    def test_classifier_training_predict(self):
        with intercept_results(Predict, 'prediction', Predict,
                               'decision_function', TrainTestSplit, 'test_target',
                               Score, 'score') as results:
            y_pred, decision_function, y_test, score = results
            self.assertFalse(execute(
                [
                    ('datasets|Iris', identifier, []),
                    ('cross-validation|TrainTestSplit', identifier,
                     [('test_size', [('Integer', '50')])]),
                    ('classifiers|LinearSVC', identifier, []),
                    ('Predict', identifier, []),
                    ('Score', identifier, []),
                    # use custom metric
                    ('Score', identifier,
                     [('metric', [('String', 'f1')])]),

                ],
                [
                    # train test split
                    (0, 'data', 1, 'data'),
                    (0, 'target', 1, 'target'),
                    # fit LinearSVC on training data
                    (1, 'training_data', 2, 'training_data'),
                    (1, 'training_target', 2, 'training_target'),
                    # predict on test data
                    (2, 'model', 3, 'model'),
                    (1, 'test_data', 3, 'data'),
                    # score test data
                    (2, 'model', 4, 'model'),
                    (1, 'test_data', 4, 'data'),
                    (1, 'test_target', 4, 'target'),
                    # f1 scorer
                    (2, 'model', 5, 'model'),
                    (1, 'test_data', 5, 'data'),
                    (1, 'test_target', 5, 'target')
                ]
            ))
        y_pred = np.hstack(y_pred)
        decision_function = np.vstack(decision_function)
        y_test = np.hstack(y_test)
        # unpack the results from the two scorers
        score_acc, score_f1 = score
        self.assertEqual(y_pred.shape, (50,))
        self.assertTrue(np.all(np.unique(y_pred) == np.array([0, 1, 2])))
        self.assertEqual(decision_function.shape, (50, 3))
        # some accuracy
        self.assertTrue(np.mean(y_test == y_pred) > .8)
        # score is actually the accuracy
        self.assertEqual(np.mean(y_test == y_pred), score_acc)
        # f1 score is actually f1 score
        self.assertEqual(f1_score(y_test, y_pred), score_f1)
Exemplo n.º 11
0
 def test_classifier_training_predict(self):
     with intercept_results(Predict, 'prediction', Predict,
                            'decision_function', TrainTestSplit,
                            'test_target', Score, 'score') as results:
         y_pred, decision_function, y_test, score = results
         self.assertFalse(
             execute(
                 [
                     ('datasets|Iris', identifier, []),
                     ('cross-validation|TrainTestSplit', identifier, [
                         ('test_size', [('Integer', '50')])
                     ]),
                     ('classifiers|LinearSVC', identifier, []),
                     ('Predict', identifier, []),
                     ('Score', identifier, []),
                     # use custom metric
                     ('Score', identifier, [('metric', [('String', 'f1')])]
                      ),
                 ],
                 [
                     # train test split
                     (0, 'data', 1, 'data'),
                     (0, 'target', 1, 'target'),
                     # fit LinearSVC on training data
                     (1, 'training_data', 2, 'training_data'),
                     (1, 'training_target', 2, 'training_target'),
                     # predict on test data
                     (2, 'model', 3, 'model'),
                     (1, 'test_data', 3, 'data'),
                     # score test data
                     (2, 'model', 4, 'model'),
                     (1, 'test_data', 4, 'data'),
                     (1, 'test_target', 4, 'target'),
                     # f1 scorer
                     (2, 'model', 5, 'model'),
                     (1, 'test_data', 5, 'data'),
                     (1, 'test_target', 5, 'target')
                 ]))
     y_pred = np.hstack(y_pred)
     decision_function = np.vstack(decision_function)
     y_test = np.hstack(y_test)
     # unpack the results from the two scorers
     score_acc, score_f1 = score
     self.assertEqual(y_pred.shape, (50, ))
     self.assertTrue(np.all(np.unique(y_pred) == np.array([0, 1, 2])))
     self.assertEqual(decision_function.shape, (50, 3))
     # some accuracy
     self.assertTrue(np.mean(y_test == y_pred) > .8)
     # score is actually the accuracy
     self.assertEqual(np.mean(y_test == y_pred), score_acc)
     # f1 score is actually f1 score
     self.assertEqual(f1_score(y_test, y_pred), score_f1)
Exemplo n.º 12
0
 def test_transformer_unsupervised_transform(self):
     # test PCA
     with intercept_results(Transform,
                            'transformed_data') as (transformed_data, ):
         self.assertFalse(
             execute([('datasets|Iris', identifier, []),
                      ('decomposition|PCA', identifier, [
                          ('n_components', [('Integer', '2')])
                      ]), ('Transform', identifier, [])],
                     [(0, 'data', 1, 'training_data'),
                      (1, 'model', 2, 'model'), (0, 'data', 2, 'data')]))
     transformed_data = np.vstack(transformed_data)
     self.assertEqual(transformed_data.shape, (150, 2))
Exemplo n.º 13
0
 def test_nested_cross_validation(self):
     with intercept_results(CrossValScore, 'scores') as (scores, ):
         self.assertFalse(
             execute(
                 [('datasets|Iris', identifier, []),
                  ('classifiers|DecisionTreeClassifier', identifier, []),
                  ('GridSearchCV', identifier, [('parameters', [
                      ('Dictionary', "{'max_depth': [1, 2, 3, 4]}")
                  ])]), ('cross-validation|CrossValScore', identifier, [])],
                 [(0, 'data', 3, 'data'), (0, 'target', 3, 'target'),
                  (1, 'model', 2, 'model'), (2, 'model', 3, 'model')]))
     self.assertEqual(len(scores[0]), 3)
     self.assertTrue(np.mean(scores[0]) > .8)
Exemplo n.º 14
0
 def test_cross_val_score(self):
     # chech that cross_val score of LinearSVC has the right length
     with intercept_results(CrossValScore, 'scores') as (scores, ):
         self.assertFalse(
             execute([
                 ('datasets|Iris', identifier, []),
                 ('classifiers|LinearSVC', identifier, []),
                 ('cross-validation|CrossValScore', identifier, []),
             ], [(0, 'data', 2, 'data'), (0, 'target', 2, 'target'),
                 (1, 'model', 2, 'model')]))
     scores = np.hstack(scores)
     self.assertEqual(scores.shape, (3, ))
     self.assertTrue(np.mean(scores) > .8)
Exemplo n.º 15
0
 def test_manifold_learning(self):
     # test Isomap
     with intercept_results(class_by_name("Isomap"),
                            'transformed_data') as (transformed_data, ):
         self.assertFalse(
             execute([
                 ('datasets|Iris', identifier, []),
                 ('manifold|Isomap', identifier, []),
             ], [
                 (0, 'data', 1, 'training_data'),
             ]))
     transformed_data = np.vstack(transformed_data)
     self.assertEqual(transformed_data.shape, (150, 2))
Exemplo n.º 16
0
    def test_list(self):
        """Reads a list of object or list rows.
        """
        json_files = [
            """
            [
                {"firstname": "John", "lastname": "Smith", "age": 25},
                {"firstname": "Lara", "lastname": "Croft", "age": 21},
                {"firstname": "Michael", "lastname": "Buck", "age": 78}
            ]
            """,
            """
            [[2, 7, 6],
             [9, 5, 1],
             [4, 3, 8]]
            """,
            ]

        for nb, json_file in enumerate(json_files):
            with intercept_results(JSONList, 'value', 'column_count',
                                   'column_names') as results:
                self.assertFalse(execute([
                        ('WriteFile', 'org.vistrails.vistrails.basic', [
                            ('in_value', [('String', json_file)]),
                        ]),
                        ('read|JSONList', identifier, []),
                    ],
                    [
                        (0, 'out_value', 1, 'file'),
                    ]))
            self.assertTrue(all((len(r) == 1) for r in results[:2]))
            (table,), (count,), names = results
            self.assertEqual(count, 3)

            import numpy
            if nb == 0:
                self.assertEqual(names, [table.names])
                self.assertEqual(set(table.names),
                                 set(['firstname', 'lastname', 'age']))
                self.assertEqual(set(table.get_column_by_name('firstname')),
                                 set(["John", "Lara", "Michael"]))
                l = table.get_column_by_name('age', True)
                self.assertIsInstance(l, numpy.ndarray)
                self.assertEqual(set(l), set([21, 25, 78]))
            else:
                self.assertEqual(names, [])
                self.assertIsNone(table.names)
                self.assertEqual([table.get_column(col) for col in xrange(3)],
                                 [[2, 9, 4],
                                  [7, 5, 3],
                                  [6, 1, 8]])
Exemplo n.º 17
0
    def test_list(self):
        """Reads a list of object or list rows.
        """
        json_files = [
            """
            [
                {"firstname": "John", "lastname": "Smith", "age": 25},
                {"firstname": "Lara", "lastname": "Croft", "age": 21},
                {"firstname": "Michael", "lastname": "Buck", "age": 78}
            ]
            """,
            """
            [[2, 7, 6],
             [9, 5, 1],
             [4, 3, 8]]
            """,
            ]

        for nb, json_file in enumerate(json_files):
            with intercept_results(JSONList, 'value', 'column_count',
                                   'column_names') as results:
                self.assertFalse(execute([
                        ('WriteFile', 'org.vistrails.vistrails.basic', [
                            ('in_value', [('String', json_file)]),
                        ]),
                        ('read|JSONList', identifier, []),
                    ],
                    [
                        (0, 'out_value', 1, 'file'),
                    ]))
            self.assertTrue(all((len(r) == 1) for r in results[:2]))
            (table,), (count,), names = results
            self.assertEqual(count, 3)

            import numpy
            if nb == 0:
                self.assertEqual(names, [table.names])
                self.assertEqual(set(table.names),
                                 set(['firstname', 'lastname', 'age']))
                self.assertEqual(set(table.get_column_by_name('firstname')),
                                 set(["John", "Lara", "Michael"]))
                l = table.get_column_by_name('age', True)
                self.assertIsInstance(l, numpy.ndarray)
                self.assertEqual(set(l), set([21, 25, 78]))
            else:
                self.assertEqual(names, [])
                self.assertIsNone(table.names)
                self.assertEqual([table.get_column(col) for col in xrange(3)],
                                 [[2, 9, 4],
                                  [7, 5, 3],
                                  [6, 1, 8]])
Exemplo n.º 18
0
 def test_manifold_learning(self):
     # test Isomap
     with intercept_results(class_by_name("Isomap"), 'transformed_data') as (transformed_data,):
         self.assertFalse(execute(
             [
                 ('datasets|Iris', identifier, []),
                 ('manifold|Isomap', identifier, []),
             ],
             [
                 (0, 'data', 1, 'training_data'),
             ]
         ))
     transformed_data = np.vstack(transformed_data)
     self.assertEqual(transformed_data.shape, (150, 2))
Exemplo n.º 19
0
 def test_transformer_supervised_transform(self):
     # test feature selection
     with intercept_results(Transform,
                            'transformed_data') as (transformed_data, ):
         self.assertFalse(
             execute([('datasets|Iris', identifier, []),
                      ('feature_selection|SelectKBest', identifier, [
                          ('k', [('Integer', '2')])
                      ]), ('Transform', identifier, [])],
                     [(0, 'data', 1, 'training_data'),
                      (0, 'target', 1, 'training_target'),
                      (1, 'model', 2, 'model'), (0, 'data', 2, 'data')]))
     transformed_data = np.vstack(transformed_data)
     self.assertEqual(transformed_data.shape, (150, 2))
Exemplo n.º 20
0
 def test_gridsearchcv(self):
     # check that gridsearch on DecisionTreeClassifier does the right number of runs
     # and gives the correct result.
     with intercept_results(GridSearchCV, 'scores', GridSearchCV,
                            'best_parameters') as (scores, parameters):
         self.assertFalse(
             execute([
                 ('datasets|Iris', identifier, []),
                 ('classifiers|DecisionTreeClassifier', identifier, []),
                 ('GridSearchCV', identifier, [('parameters', [
                     ('Dictionary', "{'max_depth': [1, 2, 3, 4]}")
                 ])]),
             ], [(0, 'data', 2, 'data'), (0, 'target', 2, 'target'),
                 (1, 'model', 2, 'model')]))
     self.assertEqual(len(scores[0]), 4)
     self.assertTrue(parameters[0]['max_depth'], 2)
Exemplo n.º 21
0
 def do_the_test(self, toolname):
     with intercept_results(self._tools['intern_cltools_1'],
             'return_code', 'f_out', 'stdout') as (
             return_code, f_out, stdout):
         self.assertFalse(execute([
                 ('intern_cltools_1', 'org.vistrails.vistrails.cltools', [
                     ('f_in', [('File', self.testdir + '/test_1.cltest')]),
                     ('chars', [('List', '["a", "b", "c"]')]),
                     ('false', [('Boolean', 'False')]),
                     ('true', [('Boolean', 'True')]),
                     ('nb', [('Integer', '42')]),
                     ('stdin', [('String', 'some line\nignored')]),
                 ]),
             ]))
     self.assertEqual(return_code, [0])
     self.assertEqual(f_out, ['ok\nmessage received'])
     self.assertEqual(stdout, ['program output here'])
Exemplo n.º 22
0
 def do_the_test(self, toolname):
     with intercept_results(self._tools[toolname], 'return_code', 'f_out',
                            'stdout') as (return_code, f_out, stdout):
         self.assertFalse(
             execute([
                 (toolname, 'org.vistrails.vistrails.cltools', [
                     ('f_in', [('File', self.testdir + '/test_1.cltest')]),
                     ('chars', [('List', '["a", "b", "c"]')]),
                     ('false', [('Boolean', 'False')]),
                     ('true', [('Boolean', 'True')]),
                     ('nb', [('Integer', '42')]),
                     ('stdin', [('String', 'some line\nignored')]),
                 ]),
             ]))
     self.assertEqual(return_code, [0])
     self.assertEqual(f_out, ['ok\nmessage received'])
     self.assertEqual(stdout, ['program output here'])
Exemplo n.º 23
0
 def test_transformer_unsupervised_transform(self):
     # test PCA
     with intercept_results(Transform, 'transformed_data') as (transformed_data,):
         self.assertFalse(execute(
             [
                 ('datasets|Iris', identifier, []),
                 ('decomposition|PCA', identifier,
                     [('n_components', [('Integer', '2')])]),
                 ('Transform', identifier, [])
             ],
             [
                 (0, 'data', 1, 'training_data'),
                 (1, 'model', 2, 'model'),
                 (0, 'data', 2, 'data')
             ]
         ))
     transformed_data = np.vstack(transformed_data)
     self.assertEqual(transformed_data.shape, (150, 2))
Exemplo n.º 24
0
 def test_cross_val_score(self):
     # chech that cross_val score of LinearSVC has the right length
     with intercept_results(CrossValScore, 'scores') as (scores,):
         self.assertFalse(execute(
             [
                 ('datasets|Iris', identifier, []),
                 ('classifiers|LinearSVC', identifier, []),
                 ('cross-validation|CrossValScore', identifier, []),
             ],
             [
                 (0, 'data', 2, 'data'),
                 (0, 'target', 2, 'target'),
                 (1, 'model', 2, 'model')
             ]
         ))
     scores = np.hstack(scores)
     self.assertEqual(scores.shape, (3,))
     self.assertTrue(np.mean(scores) > .8)
Exemplo n.º 25
0
 def test_transformer_supervised_transform(self):
     # test feature selection
     with intercept_results(Transform, 'transformed_data') as (transformed_data,):
         self.assertFalse(execute(
             [
                 ('datasets|Iris', identifier, []),
                 ('feature_selection|SelectKBest', identifier,
                     [('k', [('Integer', '2')])]),
                 ('Transform', identifier, [])
             ],
             [
                 (0, 'data', 1, 'training_data'),
                 (0, 'target', 1, 'training_target'),
                 (1, 'model', 2, 'model'),
                 (0, 'data', 2, 'data')
             ]
         ))
     transformed_data = np.vstack(transformed_data)
     self.assertEqual(transformed_data.shape, (150, 2))
Exemplo n.º 26
0
 def test_nested_cross_validation(self):
     with intercept_results(CrossValScore, 'scores') as (scores, ):
         self.assertFalse(execute(
             [
                 ('datasets|Iris', identifier, []),
                 ('classifiers|DecisionTreeClassifier', identifier, []),
                 ('GridSearchCV', identifier,
                  [('parameters', [('Dictionary', "{'max_depth': [1, 2, 3, 4]}")])]),
                 ('cross-validation|CrossValScore', identifier, [])
             ],
             [
                 (0, 'data', 3, 'data'),
                 (0, 'target', 3, 'target'),
                 (1, 'model', 2, 'model'),
                 (2, 'model', 3, 'model')
             ]
         ))
     self.assertEqual(len(scores[0]), 3)
     self.assertTrue(np.mean(scores[0]) > .8)
Exemplo n.º 27
0
 def test_train_test_split(self):
     # check that we can split the iris dataset
     with intercept_results(TrainTestSplit, 'training_data', TrainTestSplit,
                            'training_target', TrainTestSplit, 'test_data',
                            TrainTestSplit, 'test_target') as results:
         X_train, y_train, X_test, y_test = results
         self.assertFalse(
             execute([('datasets|Iris', identifier, []),
                      ('cross-validation|TrainTestSplit', identifier, [
                          ('test_size', [('Integer', '50')])
                      ])], [(0, 'data', 1, 'data'),
                            (0, 'target', 1, 'target')]))
     X_train = np.vstack(X_train)
     X_test = np.vstack(X_test)
     y_train = np.hstack(y_train)
     y_test = np.hstack(y_test)
     self.assertEqual(X_train.shape, (100, 4))
     self.assertEqual(X_test.shape, (50, 4))
     self.assertEqual(y_train.shape, (100, ))
     self.assertEqual(y_test.shape, (50, ))
Exemplo n.º 28
0
 def test_gridsearchcv(self):
     # check that gridsearch on DecisionTreeClassifier does the right number of runs
     # and gives the correct result.
     with intercept_results(GridSearchCV, 'scores', GridSearchCV,
                            'best_parameters') as (scores, parameters):
         self.assertFalse(execute(
             [
                 ('datasets|Iris', identifier, []),
                 ('classifiers|DecisionTreeClassifier', identifier, []),
                 ('GridSearchCV', identifier,
                  [('parameters', [('Dictionary', "{'max_depth': [1, 2, 3, 4]}")])]),
             ],
             [
                 (0, 'data', 2, 'data'),
                 (0, 'target', 2, 'target'),
                 (1, 'model', 2, 'model')
             ]
         ))
     self.assertEqual(len(scores[0]), 4)
     self.assertTrue(parameters[0]['max_depth'], 2)
Exemplo n.º 29
0
    def test_query_sqlite3(self):
        """Queries a SQLite3 database.
        """
        import os
        import sqlite3
        import tempfile
        import urllib2
        from vistrails.tests.utils import execute, intercept_results
        identifier = 'org.vistrails.vistrails.sql'

        test_db_fd, test_db = tempfile.mkstemp(suffix='.sqlite3')
        os.close(test_db_fd)
        try:
            conn = sqlite3.connect(test_db)
            cur = conn.cursor()
            cur.execute('''
                    CREATE TABLE test(name VARCHAR(24) PRIMARY KEY,
                                      lastname VARCHAR(32) NOT NULL,
                                      age INTEGER NOT NULL)
                    ''')
            cur.executemany(
                '''
                    INSERT INTO test(name, lastname, age)
                    VALUES(:name, :lastname, :age)
                    ''', [{
                    'name': 'John',
                    'lastname': 'Smith',
                    'age': 25
                }, {
                    'name': 'Lara',
                    'lastname': 'Croft',
                    'age': 21
                }])
            conn.commit()
            conn.close()

            source = ('''
                    INSERT INTO test(name, lastname, age)
                    VALUES(:name, :lastname, :age)
                    ''')

            with intercept_results(DBConnection, 'connection', SQLSource,
                                   'result') as (connection, table):
                self.assertFalse(
                    execute([
                        ('DBConnection', identifier, [
                            ('protocol', [('String', 'sqlite')]),
                            ('db_name', [('String', test_db)]),
                        ]),
                        ('SQLSource', identifier, [
                            ('source', [('String', urllib2.quote(source))]),
                            ('name', [('String', 'Michael')]),
                            ('lastname', [('String', 'Buck')]),
                            ('age', [('Integer', '78')]),
                        ]),
                    ], [
                        (0, 'connection', 1, 'connection'),
                    ],
                            add_port_specs=[
                                (1, 'input', 'name',
                                 'org.vistrails.vistrails.basic:String'),
                                (1, 'input', 'lastname',
                                 'org.vistrails.vistrails.basic:String'),
                                (1, 'input', 'age',
                                 'org.vistrails.vistrails.basic:Integer'),
                            ]))

            self.assertEqual(len(connection), 1)
            connection[0].close()
            self.assertEqual(len(table), 1)
            self.assertIsNone(table[0])

            source = "SELECT name, lastname, age FROM test WHERE age > :age"

            with intercept_results(DBConnection, 'connection', SQLSource,
                                   'result') as (connection, table):
                self.assertFalse(
                    execute([
                        ('DBConnection', identifier, [
                            ('protocol', [('String', 'sqlite')]),
                            ('db_name', [('String', test_db)]),
                        ]),
                        ('SQLSource', identifier, [
                            ('source', [('String', urllib2.quote(source))]),
                            ('age', [('Integer', '22')]),
                        ]),
                    ], [
                        (0, 'connection', 1, 'connection'),
                    ],
                            add_port_specs=[
                                (1, 'input', 'age',
                                 'org.vistrails.vistrails.basic:Integer'),
                            ]))

            self.assertEqual(len(connection), 1)
            connection[0].close()
            self.assertEqual(len(table), 1)
            table, = table
            self.assertEqual(table.names, ['name', 'lastname', 'age'])
            self.assertEqual((table.rows, table.columns), (2, 3))
            self.assertEqual(set(table.get_column(1)), set(['Smith', 'Buck']))
        finally:
            try:
                os.remove(test_db)
            except OSError:
                pass  # Oops, we are leaking the file here...
Exemplo n.º 30
0
    def test_query_sqlite3(self):
        """Queries a SQLite3 database.
        """
        import os
        import sqlite3
        import tempfile
        import urllib2
        from vistrails.tests.utils import execute, intercept_results
        identifier = 'org.vistrails.vistrails.sql'

        test_db_fd, test_db = tempfile.mkstemp(suffix='.sqlite3')
        os.close(test_db_fd)
        try:
            conn = sqlite3.connect(test_db)
            cur = conn.cursor()
            cur.execute('''
                    CREATE TABLE test(name VARCHAR(24) PRIMARY KEY,
                                      lastname VARCHAR(32) NOT NULL,
                                      age INTEGER NOT NULL)
                    ''')
            cur.executemany('''
                    INSERT INTO test(name, lastname, age)
                    VALUES(:name, :lastname, :age)
                    ''',
                    [{'name': 'John', 'lastname': 'Smith', 'age': 25},
                     {'name': 'Lara', 'lastname': 'Croft', 'age': 21}])
            conn.commit()
            conn.close()

            source = ('''
                    INSERT INTO test(name, lastname, age)
                    VALUES(:name, :lastname, :age)
                    ''')

            with intercept_results(DBConnection, 'connection', SQLSource, 'result') as (connection, table):
                self.assertFalse(execute([
                        ('DBConnection', identifier, [
                            ('protocol', [('String', 'sqlite')]),
                            ('db_name', [('String', test_db)]),
                        ]),
                        ('SQLSource', identifier, [
                            ('source', [('String', urllib2.quote(source))]),
                            ('name', [('String', 'Michael')]),
                            ('lastname', [('String', 'Buck')]),
                            ('age', [('Integer', '78')]),
                        ]),
                    ],
                    [
                        (0, 'connection', 1, 'connection'),
                    ],
                    add_port_specs=[
                        (1, 'input', 'name',
                         'org.vistrails.vistrails.basic:String'),
                        (1, 'input', 'lastname',
                         'org.vistrails.vistrails.basic:String'),
                        (1, 'input', 'age',
                         'org.vistrails.vistrails.basic:Integer'),
                    ]))

            self.assertEqual(len(connection), 1)
            connection[0].close()
            self.assertEqual(len(table), 1)
            self.assertIsNone(table[0])

            source = "SELECT name, lastname, age FROM test WHERE age > :age"

            with intercept_results(DBConnection, 'connection', SQLSource, 'result') as (connection, table):
                self.assertFalse(execute([
                        ('DBConnection', identifier, [
                            ('protocol', [('String', 'sqlite')]),
                            ('db_name', [('String', test_db)]),
                        ]),
                        ('SQLSource', identifier, [
                            ('source', [('String', urllib2.quote(source))]),
                            ('age', [('Integer', '22')]),
                        ]),
                    ],
                    [
                        (0, 'connection', 1, 'connection'),
                    ],
                    add_port_specs=[
                        (1, 'input', 'age',
                         'org.vistrails.vistrails.basic:Integer'),
                    ]))

            self.assertEqual(len(connection), 1)
            connection[0].close()
            self.assertEqual(len(table), 1)
            table, = table
            self.assertEqual(table.names, ['name', 'lastname', 'age'])
            self.assertEqual((table.rows, table.columns), (2, 3))
            self.assertEqual(set(table.get_column(1)),
                             set(['Smith', 'Buck']))
        finally:
            try:
                os.remove(test_db)
            except OSError:
                pass # Oops, we are leaking the file here...