示例#1
0
def test_pred_non_int_count_fix():
    testrows = [
        {'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True},
        {'ColInt': '4', 'ColFloat':None, 'ColCat': 'b', 'ColBool':False}]
    clean_predictions(testrows, vschema)
    assert testrows[1]['ColInt'] == 4
    validate_predictions(testrows, vschema)
示例#2
0
def test_pred_valid_rows_no_id():
    refrows = [{
        'ColInt': None,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': None,
        'ColBool': False
    }, {
        'ColInt': None,
        'ColFloat': None
    }]
    testrows = [{
        'ColInt': None,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': None,
        'ColBool': False
    }, {
        'ColInt': None,
        'ColFloat': None
    }]
    assert_raises(VeritableError, validate_predictions, testrows, vschema)
    assert testrows == refrows
    clean_predictions(testrows, vschema)
    assert testrows != refrows
示例#3
0
def test_pred_valid_rows_fix():
    refrows = [{
        '_request_id': '0',
        'ColInt': None,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        '_request_id': '1',
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': None,
        'ColBool': False
    }, {
        '_request_id': '2',
        'ColInt': None,
        'ColFloat': None
    }]
    testrows = [{
        'ColInt': None,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': None,
        'ColBool': False
    }, {
        'ColInt': None,
        'ColFloat': None
    }]
    clean_predictions(testrows, vschema)
    assert testrows == refrows
示例#4
0
def test_pred_non_float_real_fix():
    testrows = [
        {'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True},
        {'ColInt':None, 'ColFloat': '4.1', 'ColCat': 'b', 'ColBool':False}]
    clean_predictions(testrows, vschema)
    assert testrows[1]['ColFloat'] == 4.1
    validate_predictions(testrows, vschema)
示例#5
0
def test_pred_nonvalid_float_real_fix():
    testrows = [
        {'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True},
        {'ColInt':None, 'ColFloat': 'jello', 'ColCat': 'b', 'ColBool':False}]
    clean_predictions(testrows, vschema)
    assert not('ColFloat' in testrows[1])
    validate_predictions(testrows, vschema)
示例#6
0
def test_pred_negative_int_count_fix():
    testrows = [
        {'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True},
        {'ColInt': -3, 'ColFloat':None, 'ColCat': 'b', 'ColBool':False}]
    assert_raises(VeritableError, clean_predictions, testrows,
        vschema, remove_invalids=False)
    clean_predictions(testrows, vschema)
    assert not('ColInt' in testrows[1])
示例#7
0
def test_pred_int_count_limit_fix():
    testrows = [
        {'_request_id': '0', 'ColInt':3, 'ColFloat':None, 'ColCat': 'a',
         'ColBool':True},
        {'_request_id': '1', 'ColInt': 100001, 'ColFloat':None, 'ColCat': 'b',
         'ColBool':False}]
    clean_predictions(testrows, vschema)
    assert 'ColInt' not in testrows[1]
    validate_predictions(testrows, vschema)
示例#8
0
def test_pred_extrafield_fix():
    testrows = [
        {'_id': '1', 'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True},
        {'ColEx':None, 'ColInt':4, 'ColFloat':None, 'ColCat': 'b',
         'ColBool':False}]
    clean_predictions(testrows, vschema)
    assert not('_id' in testrows[0])
    assert not('ColEx' in testrows[1])
    validate_predictions(testrows, vschema)
示例#9
0
def test_pred_nonvalid_bool_boolean_fixfail():
    testrows = [{'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'jello'}]
    assert_raises(VeritableError, clean_predictions, testrows,
        vschema, remove_invalids=False)
    try:
        clean_predictions(testrows, vschema, remove_invalids=False)
    except VeritableError as e:
        assert e.row == 1
        assert e.col == 'ColBool'
示例#10
0
def test_pred_negative_int_count_fixfail():
    testrows = [
        {'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True},
        {'ColInt': -3, 'ColFloat':None, 'ColCat': 'b', 'ColBool':False}]
    assert_raises(VeritableError, clean_predictions, testrows,
        vschema, remove_invalids=False)
    try:
        clean_predictions(testrows, vschema, remove_invalids=False)
    except VeritableError as e:
        assert e.row == 1
        assert e.col == 'ColInt'
示例#11
0
def test_pred_valid_rows_fix():
    refrows = [
        {'_request_id': '0', 'ColInt':None, 'ColFloat':None,
         'ColCat': 'a', 'ColBool':True},
        {'_request_id': '1', 'ColInt':None, 'ColFloat':4.1, 'ColCat':None,
         'ColBool':False},
        {'_request_id': '2', 'ColInt':None, 'ColFloat':None}]
    testrows = [
        {'ColInt':None, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat':None, 'ColBool':False},
        {'ColInt':None, 'ColFloat':None}]
    clean_predictions(testrows, vschema)
    assert testrows == refrows
示例#12
0
def test_pred_valid_rows_no_id():
    refrows = [
        {'ColInt':None, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat':None, 'ColBool':False},
        {'ColInt':None, 'ColFloat':None}]
    testrows = [
        {'ColInt':None, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat':None, 'ColBool':False},
        {'ColInt':None, 'ColFloat':None}]
    assert_raises(VeritableError, validate_predictions, testrows, vschema)
    assert testrows == refrows
    clean_predictions(testrows, vschema)
    assert testrows != refrows
示例#13
0
def test_pred_non_bool_boolean_truefix():
    testrows = [{
        'ColInt': None,
        'ColFloat': 3.1,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': '1'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': '2'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'True'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'true'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'Yes'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'YES'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'Y'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'y'
    }]
    clean_predictions(testrows, vschema)
    for r in testrows:
        assert r['ColBool'] == True
    validate_predictions(testrows, vschema)
示例#14
0
def test_pred_non_bool_boolean_falsefix():
    testrows = [
        {'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':False},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': '0'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'False'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'false'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'No'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'NO'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'N'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'n'}]
    clean_predictions(testrows, vschema)
    for r in testrows:
        assert r['ColBool'] == False
    validate_predictions(testrows, vschema)
示例#15
0
def test_pred_non_str_cat_fix():
    testrows = [{
        'ColInt': None,
        'ColFloat': 3.1,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 3,
        'ColBool': False
    }]
    clean_predictions(testrows, vschema)
    assert testrows[1]['ColCat'] == '3'
    validate_predictions(testrows, vschema)
示例#16
0
def test_pred_non_float_real_fix():
    testrows = [{
        'ColInt': None,
        'ColFloat': 3.1,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': None,
        'ColFloat': '4.1',
        'ColCat': 'b',
        'ColBool': False
    }]
    clean_predictions(testrows, vschema)
    assert testrows[1]['ColFloat'] == 4.1
    validate_predictions(testrows, vschema)
示例#17
0
def test_pred_nonvalid_int_count_fix():
    testrows = [{
        'ColInt': 3,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': 'jello',
        'ColFloat': None,
        'ColCat': 'b',
        'ColBool': False
    }]
    clean_predictions(testrows, vschema)
    assert not ('ColInt' in testrows[1])
    validate_predictions(testrows, vschema)
示例#18
0
def test_pred_non_bool_boolean_truefix():
    testrows = [
        {'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': '1'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': '2'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'True'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'true'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'Yes'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'YES'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'Y'},
        {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'y'}]
    clean_predictions(testrows, vschema)
    for r in testrows:
        assert r['ColBool'] == True
    validate_predictions(testrows, vschema)
示例#19
0
def test_pred_nonvalid_bool_boolean_fix():
    testrows = [{
        'ColInt': None,
        'ColFloat': 3.1,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'jello'
    }]
    clean_predictions(testrows, vschema)
    assert not ('ColBool' in testrows[1])
    validate_predictions(testrows, vschema)
示例#20
0
def test_pred_non_int_count_fix():
    testrows = [{
        'ColInt': 3,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': '4',
        'ColFloat': None,
        'ColCat': 'b',
        'ColBool': False
    }]
    clean_predictions(testrows, vschema)
    assert testrows[1]['ColInt'] == 4
    validate_predictions(testrows, vschema)
示例#21
0
def test_pred_non_bool_boolean_falsefix():
    testrows = [{
        'ColInt': None,
        'ColFloat': 3.1,
        'ColCat': 'a',
        'ColBool': False
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': '0'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'False'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'false'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'No'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'NO'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'N'
    }, {
        'ColInt': None,
        'ColFloat': 4.1,
        'ColCat': 'b',
        'ColBool': 'n'
    }]
    clean_predictions(testrows, vschema)
    for r in testrows:
        assert r['ColBool'] == False
    validate_predictions(testrows, vschema)
示例#22
0
def predict_known_target_column(data, analysis, schema, target):
    # make predictions for each row of a test dataset, for some known target
    # columns, and collect one dict for each row contining the actual value
    # and the predictions object
    results = []
    rows = [row for row in deepcopy(data) if target in row]
    prediction_requests = deepcopy(rows)    
    clean_predictions(prediction_requests, schema)
    for prediction_request in prediction_requests:
        prediction_request[target] = None
    prediction_results = list(analysis.batch_predict(prediction_requests))
    results = []
    for i in range(len(prediction_requests)):
        result = {'actual': rows[i][target],
                  'predicted': prediction_results[i]}
        results.append(result)
    return results
示例#23
0
def test_pred_int_count_limit_fix():
    testrows = [{
        '_request_id': '0',
        'ColInt': 3,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        '_request_id': '1',
        'ColInt': 100001,
        'ColFloat': None,
        'ColCat': 'b',
        'ColBool': False
    }]
    clean_predictions(testrows, vschema)
    assert 'ColInt' not in testrows[1]
    validate_predictions(testrows, vschema)
示例#24
0
def test_pred_extrafield_fix():
    testrows = [{
        '_id': '1',
        'ColInt': 3,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColEx': None,
        'ColInt': 4,
        'ColFloat': None,
        'ColCat': 'b',
        'ColBool': False
    }]
    clean_predictions(testrows, vschema)
    assert not ('_id' in testrows[0])
    assert not ('ColEx' in testrows[1])
    validate_predictions(testrows, vschema)
示例#25
0
def test_pred_negative_int_count_fix():
    testrows = [{
        'ColInt': 3,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': -3,
        'ColFloat': None,
        'ColCat': 'b',
        'ColBool': False
    }]
    assert_raises(VeritableError,
                  clean_predictions,
                  testrows,
                  vschema,
                  remove_invalids=False)
    clean_predictions(testrows, vschema)
    assert not ('ColInt' in testrows[1])
示例#26
0
def test_pred_nonvalid_int_count_fixfail():
    testrows = [{
        'ColInt': 3,
        'ColFloat': None,
        'ColCat': 'a',
        'ColBool': True
    }, {
        'ColInt': 'jello',
        'ColFloat': None,
        'ColCat': 'b',
        'ColBool': False
    }]
    assert_raises(VeritableError,
                  clean_predictions,
                  testrows,
                  vschema,
                  remove_invalids=False)
    try:
        clean_predictions(testrows, vschema, remove_invalids=False)
    except VeritableError as e:
        assert e.row == 1
        assert e.col == 'ColInt'
示例#27
0
def test_pred_non_str_cat_fix():
    testrows = [{'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True},
                {'ColInt':None, 'ColFloat':4.1, 'ColCat':3, 'ColBool':False}]
    clean_predictions(testrows, vschema)
    assert testrows[1]['ColCat'] == '3'
    validate_predictions(testrows, vschema)