def test_pred_non_int_count_fix(): testrows = [ {'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'ColInt': '4', 'ColFloat':None, 'ColCat': 'b', 'ColBool':False}] clean_predictions(testrows, vschema) assert testrows[1]['ColInt'] == 4 validate_predictions(testrows, vschema)
def test_pred_valid_rows_no_id(): refrows = [{ 'ColInt': None, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': None, 'ColBool': False }, { 'ColInt': None, 'ColFloat': None }] testrows = [{ 'ColInt': None, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': None, 'ColBool': False }, { 'ColInt': None, 'ColFloat': None }] assert_raises(VeritableError, validate_predictions, testrows, vschema) assert testrows == refrows clean_predictions(testrows, vschema) assert testrows != refrows
def test_pred_valid_rows_fix(): refrows = [{ '_request_id': '0', 'ColInt': None, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { '_request_id': '1', 'ColInt': None, 'ColFloat': 4.1, 'ColCat': None, 'ColBool': False }, { '_request_id': '2', 'ColInt': None, 'ColFloat': None }] testrows = [{ 'ColInt': None, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': None, 'ColBool': False }, { 'ColInt': None, 'ColFloat': None }] clean_predictions(testrows, vschema) assert testrows == refrows
def test_pred_non_float_real_fix(): testrows = [ {'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True}, {'ColInt':None, 'ColFloat': '4.1', 'ColCat': 'b', 'ColBool':False}] clean_predictions(testrows, vschema) assert testrows[1]['ColFloat'] == 4.1 validate_predictions(testrows, vschema)
def test_pred_nonvalid_float_real_fix(): testrows = [ {'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True}, {'ColInt':None, 'ColFloat': 'jello', 'ColCat': 'b', 'ColBool':False}] clean_predictions(testrows, vschema) assert not('ColFloat' in testrows[1]) validate_predictions(testrows, vschema)
def test_pred_negative_int_count_fix(): testrows = [ {'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'ColInt': -3, 'ColFloat':None, 'ColCat': 'b', 'ColBool':False}] assert_raises(VeritableError, clean_predictions, testrows, vschema, remove_invalids=False) clean_predictions(testrows, vschema) assert not('ColInt' in testrows[1])
def test_pred_int_count_limit_fix(): testrows = [ {'_request_id': '0', 'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'_request_id': '1', 'ColInt': 100001, 'ColFloat':None, 'ColCat': 'b', 'ColBool':False}] clean_predictions(testrows, vschema) assert 'ColInt' not in testrows[1] validate_predictions(testrows, vschema)
def test_pred_extrafield_fix(): testrows = [ {'_id': '1', 'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'ColEx':None, 'ColInt':4, 'ColFloat':None, 'ColCat': 'b', 'ColBool':False}] clean_predictions(testrows, vschema) assert not('_id' in testrows[0]) assert not('ColEx' in testrows[1]) validate_predictions(testrows, vschema)
def test_pred_nonvalid_bool_boolean_fixfail(): testrows = [{'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'jello'}] assert_raises(VeritableError, clean_predictions, testrows, vschema, remove_invalids=False) try: clean_predictions(testrows, vschema, remove_invalids=False) except VeritableError as e: assert e.row == 1 assert e.col == 'ColBool'
def test_pred_negative_int_count_fixfail(): testrows = [ {'ColInt':3, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'ColInt': -3, 'ColFloat':None, 'ColCat': 'b', 'ColBool':False}] assert_raises(VeritableError, clean_predictions, testrows, vschema, remove_invalids=False) try: clean_predictions(testrows, vschema, remove_invalids=False) except VeritableError as e: assert e.row == 1 assert e.col == 'ColInt'
def test_pred_valid_rows_fix(): refrows = [ {'_request_id': '0', 'ColInt':None, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'_request_id': '1', 'ColInt':None, 'ColFloat':4.1, 'ColCat':None, 'ColBool':False}, {'_request_id': '2', 'ColInt':None, 'ColFloat':None}] testrows = [ {'ColInt':None, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'ColInt':None, 'ColFloat':4.1, 'ColCat':None, 'ColBool':False}, {'ColInt':None, 'ColFloat':None}] clean_predictions(testrows, vschema) assert testrows == refrows
def test_pred_valid_rows_no_id(): refrows = [ {'ColInt':None, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'ColInt':None, 'ColFloat':4.1, 'ColCat':None, 'ColBool':False}, {'ColInt':None, 'ColFloat':None}] testrows = [ {'ColInt':None, 'ColFloat':None, 'ColCat': 'a', 'ColBool':True}, {'ColInt':None, 'ColFloat':4.1, 'ColCat':None, 'ColBool':False}, {'ColInt':None, 'ColFloat':None}] assert_raises(VeritableError, validate_predictions, testrows, vschema) assert testrows == refrows clean_predictions(testrows, vschema) assert testrows != refrows
def test_pred_non_bool_boolean_truefix(): testrows = [{ 'ColInt': None, 'ColFloat': 3.1, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': '1' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': '2' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'True' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'true' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'Yes' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'YES' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'Y' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'y' }] clean_predictions(testrows, vschema) for r in testrows: assert r['ColBool'] == True validate_predictions(testrows, vschema)
def test_pred_non_bool_boolean_falsefix(): testrows = [ {'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':False}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': '0'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'False'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'false'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'No'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'NO'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'N'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'n'}] clean_predictions(testrows, vschema) for r in testrows: assert r['ColBool'] == False validate_predictions(testrows, vschema)
def test_pred_non_str_cat_fix(): testrows = [{ 'ColInt': None, 'ColFloat': 3.1, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 3, 'ColBool': False }] clean_predictions(testrows, vschema) assert testrows[1]['ColCat'] == '3' validate_predictions(testrows, vschema)
def test_pred_non_float_real_fix(): testrows = [{ 'ColInt': None, 'ColFloat': 3.1, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': None, 'ColFloat': '4.1', 'ColCat': 'b', 'ColBool': False }] clean_predictions(testrows, vschema) assert testrows[1]['ColFloat'] == 4.1 validate_predictions(testrows, vschema)
def test_pred_nonvalid_int_count_fix(): testrows = [{ 'ColInt': 3, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': 'jello', 'ColFloat': None, 'ColCat': 'b', 'ColBool': False }] clean_predictions(testrows, vschema) assert not ('ColInt' in testrows[1]) validate_predictions(testrows, vschema)
def test_pred_non_bool_boolean_truefix(): testrows = [ {'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': '1'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': '2'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'True'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'true'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'Yes'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'YES'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'Y'}, {'ColInt':None, 'ColFloat':4.1, 'ColCat': 'b', 'ColBool': 'y'}] clean_predictions(testrows, vschema) for r in testrows: assert r['ColBool'] == True validate_predictions(testrows, vschema)
def test_pred_nonvalid_bool_boolean_fix(): testrows = [{ 'ColInt': None, 'ColFloat': 3.1, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'jello' }] clean_predictions(testrows, vschema) assert not ('ColBool' in testrows[1]) validate_predictions(testrows, vschema)
def test_pred_non_int_count_fix(): testrows = [{ 'ColInt': 3, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': '4', 'ColFloat': None, 'ColCat': 'b', 'ColBool': False }] clean_predictions(testrows, vschema) assert testrows[1]['ColInt'] == 4 validate_predictions(testrows, vschema)
def test_pred_non_bool_boolean_falsefix(): testrows = [{ 'ColInt': None, 'ColFloat': 3.1, 'ColCat': 'a', 'ColBool': False }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': '0' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'False' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'false' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'No' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'NO' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'N' }, { 'ColInt': None, 'ColFloat': 4.1, 'ColCat': 'b', 'ColBool': 'n' }] clean_predictions(testrows, vschema) for r in testrows: assert r['ColBool'] == False validate_predictions(testrows, vschema)
def predict_known_target_column(data, analysis, schema, target): # make predictions for each row of a test dataset, for some known target # columns, and collect one dict for each row contining the actual value # and the predictions object results = [] rows = [row for row in deepcopy(data) if target in row] prediction_requests = deepcopy(rows) clean_predictions(prediction_requests, schema) for prediction_request in prediction_requests: prediction_request[target] = None prediction_results = list(analysis.batch_predict(prediction_requests)) results = [] for i in range(len(prediction_requests)): result = {'actual': rows[i][target], 'predicted': prediction_results[i]} results.append(result) return results
def test_pred_int_count_limit_fix(): testrows = [{ '_request_id': '0', 'ColInt': 3, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { '_request_id': '1', 'ColInt': 100001, 'ColFloat': None, 'ColCat': 'b', 'ColBool': False }] clean_predictions(testrows, vschema) assert 'ColInt' not in testrows[1] validate_predictions(testrows, vschema)
def test_pred_extrafield_fix(): testrows = [{ '_id': '1', 'ColInt': 3, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { 'ColEx': None, 'ColInt': 4, 'ColFloat': None, 'ColCat': 'b', 'ColBool': False }] clean_predictions(testrows, vschema) assert not ('_id' in testrows[0]) assert not ('ColEx' in testrows[1]) validate_predictions(testrows, vschema)
def test_pred_negative_int_count_fix(): testrows = [{ 'ColInt': 3, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': -3, 'ColFloat': None, 'ColCat': 'b', 'ColBool': False }] assert_raises(VeritableError, clean_predictions, testrows, vschema, remove_invalids=False) clean_predictions(testrows, vschema) assert not ('ColInt' in testrows[1])
def test_pred_nonvalid_int_count_fixfail(): testrows = [{ 'ColInt': 3, 'ColFloat': None, 'ColCat': 'a', 'ColBool': True }, { 'ColInt': 'jello', 'ColFloat': None, 'ColCat': 'b', 'ColBool': False }] assert_raises(VeritableError, clean_predictions, testrows, vschema, remove_invalids=False) try: clean_predictions(testrows, vschema, remove_invalids=False) except VeritableError as e: assert e.row == 1 assert e.col == 'ColInt'
def test_pred_non_str_cat_fix(): testrows = [{'ColInt':None, 'ColFloat':3.1, 'ColCat': 'a', 'ColBool':True}, {'ColInt':None, 'ColFloat':4.1, 'ColCat':3, 'ColBool':False}] clean_predictions(testrows, vschema) assert testrows[1]['ColCat'] == '3' validate_predictions(testrows, vschema)