def test_header_only(live_server): ui = PickableMock() base_url = '{webhost}/api/v1/'.format(webhost=live_server.url()) with pytest.raises(ValueError) as ctx: run_batch_predictions(base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, pid='56dd9570018e213242dfa93c', lid='56dd9570018e213242dfa93d', import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file='out.csv', keep_cols=None, delimiter=',', dataset='tests/fixtures/header_only.csv', pred_name=None, timeout=30, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='', skip_dialect=False) assert str(ctx.value) == ("Input file 'tests/fixtures/header_only.csv' " "is empty.")
def test_explicit_delimiter_gzip(live_server): ui = PickableMock() base_url = '{webhost}/api/v1/'.format(webhost=live_server.url()) ret = run_batch_predictions( base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, pid='56dd9570018e213242dfa93c', lid='56dd9570018e213242dfa93d', import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file='out.csv', keep_cols=None, delimiter=',', dataset='tests/fixtures/temperatura_predict.csv.gz', pred_name=None, timeout=30, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='', skip_dialect=False) assert ret is None
def test_bad_newline(live_server): ui = PickableMock() base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url()) run_batch_predictions(base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, pid='56dd9570018e213242dfa93c', lid='56dd9570018e213242dfa93d', import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file='out.csv', keep_cols=None, delimiter=',', dataset='tests/fixtures/diabetes_bad_newline.csv', pred_name=None, timeout=None, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='', skip_dialect=False) lines = len(open('out.csv', 'rb').readlines()) assert lines == 5 ui.warning.assert_any_call('Detected empty rows in the CSV file. ' 'These rows will be discarded.')
def test_create_and_acquire_api_token(live_server): ui = PickableMock() base_url = '{webhost}/api/v1/'.format(webhost=live_server.url()) ret = acquire_api_token(base_url, {}, 'username', 'password', True, ui) assert ret == 'Som3tok3n' ui.info.assert_called_with('api-token acquired') ui.debug.assert_called_with('api-token: Som3tok3n')
def test_acquire_api_token_bad_status(live_server): ui = PickableMock() base_url = '{webhost}/api/v1/'.format(webhost=live_server.url()) with pytest.raises(ValueError) as ctx: acquire_api_token(base_url, {}, 'bad_status', 'passwd', False, ui) assert not ui.info.called assert not ui.debug.called assert str(ctx.value) == 'api_token request returned status code 500'
def test_acquire_api_token_unauthorized(live_server): ui = PickableMock() base_url = '{webhost}/api/v1/'.format(webhost=live_server.url()) with pytest.raises(ValueError) as ctx: acquire_api_token(base_url, {}, 'unknown', 'passwd', False, ui) assert not ui.info.called assert not ui.debug.called assert str(ctx.value) == 'wrong credentials'
def test_acquire_api_token_no_token2(live_server): ui = PickableMock() base_url = '{webhost}/api/v1/'.format(webhost=live_server.url()) with pytest.raises(ValueError) as ctx: acquire_api_token(base_url, {}, 'no_token2', 'passwd', False, ui) assert not ui.info.called assert not ui.debug.called assert str(ctx.value) == ('no api-token registered; ' 'please run with --create_api_token flag.')
def test_quotechar_in_keep_cols(live_server): base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url()) ui = PickableMock() with tempfile.NamedTemporaryFile(prefix='test_', suffix='.csv', delete=False) as fd: head = open("tests/fixtures/quotes_input_head.csv", "rb").read() body_1 = open("tests/fixtures/quotes_input_first_part.csv", "rb").read() body_2 = open("tests/fixtures/quotes_input_bad_part.csv", "rb").read() fd.file.write(head) size = 0 while size < DETECT_SAMPLE_SIZE_SLOW: fd.file.write(body_1) size += len(body_1) fd.file.write(body_2) fd.close() ret = run_batch_predictions(base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, pid='56dd9570018e213242dfa93c', lid='56dd9570018e213242dfa93d', import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file='out.csv', keep_cols=["b", "c"], delimiter=None, dataset=fd.name, pred_name=None, timeout=None, ui=ui, auto_sample=True, fast_mode=False, dry_run=False, encoding='', skip_dialect=False) assert ret is None last_line = open("out.csv", "rb").readlines()[-1] expected_last_line = b'1044,2,"eeeeeeee ""eeeeee"" eeeeeeeeeeee' assert last_line[:len(expected_last_line)] == expected_last_line
def check_regression_jp(live_server, tmpdir, fast_mode, gzipped): """Use utf8 encoded input data. """ if fast_mode: out_fname = 'out_fast.csv' else: out_fname = 'out.csv' out = tmpdir.join(out_fname) dataset_suffix = '.gz' if gzipped else '' ui = PickableMock() base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url()) ret = run_batch_predictions( base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, pid='56dd9570018e213242dfa93c', lid='56dd9570018e213242dfa93e', import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=500, out_file=str(out), keep_cols=None, delimiter=None, dataset='tests/fixtures/regression_jp.csv' + dataset_suffix, pred_name='new_name', timeout=None, ui=ui, auto_sample=False, fast_mode=fast_mode, dry_run=False, encoding='', skip_dialect=False, compression=True ) assert ret is None actual = out.read_text('utf-8') with open('tests/fixtures/regression_output_jp.csv', 'rU') as f: assert actual == f.read()
def test_pred_threshold_classification(live_server, tmpdir, func_params): # train one model in project out = tmpdir.join('out.csv') ui = PickableMock() base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url()) ret = run_batch_predictions( base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, deployment_id=func_params['deployment_id'], pid=func_params['pid'], lid=func_params['lid'], import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file=str(out), keep_cols=None, delimiter=None, dataset='tests/fixtures/temperatura_predict.csv', pred_name='healthy', pred_threshold_name='threshold', timeout=None, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='', skip_dialect=False ) assert ret is None expected = out.read_text('utf-8') with open( 'tests/fixtures/temperatura_output_healthy_threshold.csv', 'rU' ) as f: assert expected == f.read(), expected
def test_prediction_explanations_keepcols(live_server, tmpdir): # train one model in project out = tmpdir.join('out.csv') ui = PickableMock() base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url()) ret = run_batch_predictions( base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, pid='5afb150782c7dd45fcc03951', lid='5b2cad28aa1d12847310acf4', import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file=str(out), keep_cols=['medical_specialty', 'number_diagnoses'], delimiter=None, dataset='tests/fixtures/10kDiabetes.csv', pred_name=None, pred_threshold_name=None, pred_decision_name=None, timeout=None, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='', skip_dialect=False, max_prediction_explanations=5 ) assert ret is None actual = out.read_text('utf-8') file_path = 'tests/fixtures/10kDiabetes_5explanations_keepcols.csv' with open(file_path, 'rU') as f: expected = f.read() assert str(actual) == str(expected), expected
def test_simple_transferable(live_server, tmpdir): # train one model in project out = tmpdir.join('out.csv') ui = PickableMock() base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url()) ret = run_batch_predictions( base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, import_id='0ec5bcea7f0f45918fa88257bfe42c09', pid=None, lid=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file=str(out), keep_cols=None, delimiter=None, dataset='tests/fixtures/regression_predict.csv', pred_name=None, pred_threshold_name=None, pred_decision_name=None, timeout=None, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='', skip_dialect=False ) assert ret is None actual = out.read_text('utf-8') with open('tests/fixtures/regression_output.csv', 'rU') as f: expected = f.read() assert str(actual) == str(expected), expected
def test_simple_with_unicode(live_server, tmpdir, func_params, dataset_name): # train one model in project out = tmpdir.join('out.csv') ui = PickableMock() base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url()) ret = run_batch_predictions( base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, deployment_id=func_params['deployment_id'], pid=func_params['pid'], lid=func_params['lid'], import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file=str(out), keep_cols=None, delimiter=None, dataset='tests/fixtures/{}'.format(dataset_name), pred_name=None, pred_threshold_name=None, pred_decision_name=None, timeout=None, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='', skip_dialect=False) assert ret is None actual = out.read_text('utf-8') with open('tests/fixtures/jpReview_books_reg_out.csv', 'rU') as f: expected = f.read() assert str(actual) == str(expected), expected
def test_simple_api_v1(live_server, tmpdir): # train one model in project out = tmpdir.join('out.csv') ui = PickableMock() base_url = '{webhost}/api/v1/'.format(webhost=live_server.url()) ret = run_batch_predictions( base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, pid='56dd9570018e213242dfa93c', lid='56dd9570018e213242dfa93f', import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file=str(out), keep_cols=None, delimiter=None, dataset='tests/fixtures/temperatura_predict.csv.gz', pred_name=None, timeout=None, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='', skip_dialect=False ) assert ret is None actual = out.read_text('utf-8') with open('tests/fixtures/temperatura_api_v1_output.csv', 'rU') as f: expected = f.read() assert str(actual) == str(expected), expected
def test_simple_with_wrong_encoding(live_server, tmpdir, func_params): out = tmpdir.join('out.csv') ui = PickableMock() base_url = '{webhost}/predApi/v1.0/'.format(webhost=live_server.url()) with pytest.raises(UnicodeDecodeError) as execinfo: run_batch_predictions(base_url=base_url, base_headers={}, user='******', pwd='password', api_token=None, create_api_token=False, deployment_id=func_params['deployment_id'], pid=func_params['pid'], lid=func_params['lid'], import_id=None, n_retry=3, concurrent=1, resume=False, n_samples=10, out_file=str(out), keep_cols=None, delimiter=None, dataset='tests/fixtures/jpReview_books_reg.csv', pred_name=None, pred_threshold_name=None, pred_decision_name=None, timeout=None, ui=ui, auto_sample=False, fast_mode=False, dry_run=False, encoding='cp932', skip_dialect=False) # Fixture dataset encoding 'utf-8' and we trying to decode it with 'cp932' assert "'cp932' codec can't decode byte" in str(execinfo.value)