def test_should_predict_the_correct_answer(train_data, test_question, expected_answer):
    (mock_train_data, mock_test_data) = create_mock_train_test_aristo_data(train_data, test_question)
    sut = SimilarityPipeline(mock_train_data, mock_test_data)

    sut.run_pipeline()

    question_id = test_question[0]
    assert sut.predictions.loc[question_id, "answer"] == expected_answer
示例#2
0
def run_train_data(train_data_csv):
    aristo_train_data = AristoData(train_data_csv, range(0,2000))
    aristo_test_data = AristoData(train_data_csv, range(100,110))
    aristo_test_data.print_summary()
    aristo_train_data.print_summary()
    pipeline = SimilarityPipeline(train_data=aristo_train_data, test_data=aristo_test_data)
    pipeline.run_pipeline()
    out_dir=os.path.join(os.path.dirname(__file__),"../../../outputdata/train_{}".format(time.strftime('%Y%m%d_%H%M%S')))

    os.makedirs(out_dir)
    pipeline.write_to_disk((out_dir))
def test_should_write_test_data_with_predictions_to_file(tmpdir, train_data, test_question, expected_answer):
    (mock_train_data, mock_test_data) = create_mock_train_test_aristo_data(train_data, test_question)
    sut = SimilarityPipeline(mock_train_data, mock_test_data)
    sut.write_to_disk(tmpdir.dirname)
    assert os.path.exists(os.path.join(tmpdir.dirname, "test_data_with_predictions.csv")) == True