Python StratifiedKFold示例

编程语言: Python

命名空间/包名称: turicreate_cross_validation.cross_validation

类/类型: StratifiedKFold

hotexamples.com的示例: 9

Python StratifiedKFold - 已找到9个示例。这些是从开源项目中提取的最受好评的turicreate_cross_validation.cross_validation.StratifiedKFold现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

StratifiedKFold(9)

示例#1

显示文件

def test_stratified_kfold_split_size():
    data = tc.SFrame({"id": range(100), 'label': [0] * 50 + [1] * 50})
    for train, test in StratifiedKFold(data, 'label', 10):
        assert len(train) == 90
        assert len(test) == 10
    for train, test in StratifiedKFold(data, 'label', 5):
        assert len(train) == 80
        assert len(test) == 20

示例#2

显示文件

def test_stratified_kfold_label_dist():
    data = tc.SFrame({"id": range(100), 'label': [0] * 50 + [1] * 50})
    for train, test in StratifiedKFold(data, 'label', 10):
        assert len(train[train["label"] == 0]) == 45
        assert len(train[train["label"] == 1]) == 45
        assert len(test[test["label"] == 0]) == 5
        assert len(test[test["label"] == 1]) == 5
    for train, test in StratifiedKFold(data, 'label', 5):
        assert len(train[train["label"] == 0]) == 40
        assert len(train[train["label"] == 1]) == 40
        assert len(test[test["label"] == 0]) == 10
        assert len(test[test["label"] == 1]) == 10
    data = tc.SFrame({"id": range(100), 'label': [0] * 90 + [1] * 10})
    for train, test in StratifiedKFold(data, 'label', 10):
        assert len(train[train["label"] == 0]) == 81
        assert len(train[train["label"] == 1]) == 9
        assert len(test[test["label"] == 0]) == 9
        assert len(test[test["label"] == 1]) == 1

示例#3

显示文件

def test_cross_val_basic():
    data = tc.SFrame({
        "id": ["a"] * 50 + ["b"] * 50,
        'label': [0] * 50 + [1] * 50
    })
    params = {'target': 'label'}
    folds = StratifiedKFold(data, 'label', 5)
    metrics = cross_val_score(folds, tc.decision_tree_classifier.create,
                              params)
    assert metrics == {
        'recall': 1.0,
        'auc': 1.0,
        'precision': 1.0,
        'accuracy': 1.0
    }

示例#4

显示文件

文件： cross_val_example.py 项目： shalom08/turicreate-cross-validation

import turicreate as tc
from turicreate_cross_validation.cross_validation import shuffle_sframe, StratifiedKFold, cross_val_score

if __name__ == "__main__":
    url = 'https://static.turi.com/datasets/xgboost/mushroom.csv'
    sf = tc.SFrame.read_csv(url)
    sf['label'] = (sf['label'] == 'p')
    params = {'target': 'label'}
    sf = shuffle_sframe(sf)
    folds = StratifiedKFold(sf, 'label', 5)
    cross_val_score(folds, tc.random_forest_classifier.create, params)

示例#5

显示文件

def test_StratifiedKFold_with_wrong_label():
    data = tc.SFrame({"id": range(100), 'label': [0] * 50 + [1] * 50})
    with pytest.raises(ToolkitError):
        folds = StratifiedKFold(data, 'label2', 5)
        for train, test in folds:
            pass

示例#6

显示文件

def test_cross_val_score_with_wrong_label():
    data = tc.SFrame({"id": range(100), 'label': [0] * 50 + [1] * 50})
    params = {'target': 'label2'}
    folds = StratifiedKFold(data, 'label', 5)
    with pytest.raises(ToolkitError):
        cross_val_score(folds, tc.random_forest_classifier.create, params)

示例#7

显示文件

def test_stratified_kfold_split_intersect():
    data = tc.SFrame({"id": range(100), 'label': [0] * 50 + [1] * 50})
    for train, test in StratifiedKFold(data, 'label', 10):
        assert 100 == len(train.unique().append(test.unique()))

示例#8

显示文件

def test_stratified_kfold_split_unique():
    data = tc.SFrame({"id": range(100), 'label': [0] * 50 + [1] * 50})
    for train, test in StratifiedKFold(data, 'label', 10):
        assert len(train) == len(train.unique())
        assert len(test) == len(test.unique())

示例#9

显示文件

def test_stratified_kfold_split_number():
    data = tc.SFrame({"id": range(100), 'label': [0] * 50 + [1] * 50})
    assert len(list(StratifiedKFold(data, 'label', 10))) == 10
    assert len(list(StratifiedKFold(data, 'label', 5))) == 5