Пример #1
0
 def test_trees(self):
     (train, label) = get_X_y(train_file, label_column, sep=',')
     (test, label1) = get_X_y(test_file, label_column, sep=',')
     pipeline = Pipeline([OneHotVectorizer() << categorical_columns,
                          FastTreesBinaryClassifier()])
     pipeline.fit(train, label)
     out_data = pipeline.predict(test)
     check_accuracy(test_file, label_column, out_data, 0.65)
Пример #2
0
 def test_trees_file(self):
     pipeline = Pipeline([OneHotVectorizer() << categorical_columns,
                          FastTreesBinaryClassifier() << {
                              'Label': label_column}])
     train_stream = FileDataStream(train_file, schema=file_schema)
     pipeline.fit(train_stream, label_column)
     test_stream = FileDataStream(test_file, schema=file_schema)
     out_data = pipeline.predict(test_stream)
     check_accuracy(test_file, label_column, out_data, 0.65)
Пример #3
0
 def test_linear_with_train_test_schema(self):
     (train, label) = get_X_y(train_file, label_column, sep=',')
     (test, label1) = get_X_y(test_file, label_column, sep=',')
     pipeline = Pipeline([OneHotVectorizer() << categorical_columns,
                          FastLinearBinaryClassifier(train_threads=1,
                                                     shuffle=False)])
     pipeline.fit(train, label)
     out_data = pipeline.predict(test)
     check_accuracy(test_file, label_column, out_data, 0.65)
Пример #4
0
 def test_linear_file_role(self):
     pipeline = Pipeline([OneHotVectorizer() << categorical_columns,
                          FastLinearBinaryClassifier(train_threads=1,
                                                     shuffle=False)])
     train_stream = FileDataStream(train_file, schema=file_schema)
     train_stream._set_role('Label', label_column)
     pipeline.fit(train_stream)
     test_stream = FileDataStream(test_file, schema=file_schema)
     out_data = pipeline.predict(test_stream)
     check_accuracy(test_file, label_column, out_data, 0.65)
Пример #5
0
    def test_linear_file(self):
        pipeline = Pipeline([OneHotVectorizer() << categorical_columns,
                             FastLinearBinaryClassifier(train_threads=1,
                                                        shuffle=False)])

        train_stream = FileDataStream(train_file, schema=file_schema)
        assert 'sep' in train_stream.schema.options
        assert 'header' in train_stream.schema.options
        pipeline.fit(train_stream, label_column)
        test_stream = FileDataStream(test_file, schema=file_schema)
        out_data = pipeline.predict(test_stream)
        check_accuracy(test_file, label_column, out_data, 0.65)