def test_vw_config_manager(): expected_set = { "--no_stdin", "--quiet", "--loss_function=logistic", "--data=test/train-sets/rcv1_small.dat", } expected_reductions = {"gd", "scorer-identity", "count_label"} vw = vowpalwabbit.Workspace( arg_str= "--loss_function logistic -d test/train-sets/rcv1_small.dat --quiet") config = vw.get_config() enabled_reductions = vw.get_enabled_reductions() cmd_str_list = helper_options_to_list_strings(config) assert set(cmd_str_list) == expected_set assert set(enabled_reductions) == expected_reductions vw.finish() # do another iteration generating the cmd string from the output of previous new_args = " ".join(cmd_str_list) other_vw = vowpalwabbit.Workspace(new_args) new_config = vw.get_config() new_cmd_str_list = helper_options_to_list_strings(new_config) assert set(new_cmd_str_list) == expected_set other_vw.finish()
def test_ccb_single_slot_and_cb_equivalence_no_slot_features(): # --- CCB ccb_model_file_name = "model_file_ccb_equiv.txt" ccb_workspace = vowpalwabbit.Workspace( quiet=True, predict_only_model=True, ccb_explore_adf=True, readable_model=ccb_model_file_name, ) ccb_ex = """ ccb shared |User b ccb action |Action d ccb action |Action e ccb action |Action f ccb action |Action ff ccb action |Action fff ccb slot 4:1:0.2 | """ ccb_workspace.learn(ccb_ex) ccb_workspace.finish() ccb_num_weights = count_weights_from_readable_model_file_for_equiv_test( ccb_model_file_name ) # --- CB cb_model_file_name = "model_file_cb_equiv.txt" cb_workspace = vowpalwabbit.Workspace( quiet=True, predict_only_model=True, cb_explore_adf=True, readable_model=cb_model_file_name, ) cb_ex = """ shared |User b |Action d |Action e |Action f |Action ff 4:1:0.2 |Action fff """ cb_workspace.learn(cb_ex) cb_workspace.finish() cb_num_weights = count_weights_from_readable_model_file_for_equiv_test( cb_model_file_name ) assert ccb_num_weights == cb_num_weights
def test_cats_pdf(): min_value = 10 max_value = 20 vw = vowpalwabbit.Workspace("--cats_pdf 4 --min_value " + str(min_value) + " --max_value " + str(max_value) + " --bandwidth 1") vw_example = vw.parse("ca 15:0.657567:6.20426e-05 | f1 f2 f3 f4", vowpalwabbit.LabelType.CONTINUOUS) vw.learn(vw_example) vw.finish_example(vw_example) assert (vw.get_prediction_type() == vowpalwabbit.PredictionType.PDF ), "prediction_type should be pdf" pdf_segments = vw.predict("| f1 f2 f3 f4") mass = 0 for segment in pdf_segments: assert len(segment) == 3 # returned action range should lie within supplied limits assert segment[0] >= min_value assert segment[0] <= max_value assert segment[1] >= min_value assert segment[1] <= max_value # pdf value must be non-negative assert segment[2] >= 0 mass += (segment[1] - segment[0]) * segment[2] assert mass >= 0.9999 and mass <= 1.0001 vw.finish()
def test_getting_started_example_with(): train_df, test_df = helper_get_data() # with syntax calls into vw.finish() automatically. # you actually want to use 'with vowpalwabbit.Workspace("--cb 4") as vw:' # but we need to assert on vw.finished for test purposes vw = vowpalwabbit.Workspace("--cb 4") with vw as vw: for i in train_df.index: action = train_df.loc[i, "action"] cost = train_df.loc[i, "cost"] probability = train_df.loc[i, "probability"] feature1 = train_df.loc[i, "feature1"] feature2 = train_df.loc[i, "feature2"] feature3 = train_df.loc[i, "feature3"] learn_example = (str(action) + ":" + str(cost) + ":" + str(probability) + " | " + str(feature1) + " " + str(feature2) + " " + str(feature3)) vw.learn(learn_example) assert (vw.get_prediction_type() == vw.pMULTICLASS ), "prediction_type should be multiclass" for j in test_df.index: feature1 = test_df.loc[j, "feature1"] feature2 = test_df.loc[j, "feature2"] feature3 = test_df.loc[j, "feature3"] choice = vw.predict("| " + str(feature1) + " " + str(feature2) + " " + str(feature3)) assert isinstance(choice, int), "choice should be int" assert choice == 3, "predicted action should be 3" assert vw.finished == True, "with syntax should finish() vw instance"
def test_ccb_single_slot_and_cb_non_equivalence_with_slot_features(): # --- CCB ccb_model_file_name = "model_file_ccb_no_equiv.txt" ccb_workspace = vowpalwabbit.Workspace( quiet=True, ccb_explore_adf=True, readable_model=ccb_model_file_name ) ccb_ex = """ ccb shared |User b ccb action |Action d ccb action |Action e ccb action |Action f ccb action |Action ff ccb action |Action fff ccb slot 4:1:0.2 | slot_feature_1 """ ccb_workspace.learn(ccb_ex) ccb_workspace.finish() ccb_num_weights = count_weights_from_readable_model_file_for_equiv_test( ccb_model_file_name ) # --- CB cb_model_file_name = "model_file_cb_no_equiv.txt" cb_workspace = vowpalwabbit.Workspace( quiet=True, cb_explore_adf=True, readable_model=cb_model_file_name ) cb_ex = """ shared |User b |Action d |Action e |Action f |Action ff 4:1:0.2 |Action fff """ cb_workspace.learn(cb_ex) cb_workspace.finish() cb_num_weights = count_weights_from_readable_model_file_for_equiv_test( cb_model_file_name ) # Since there was at least one slot feature supplied, the equivalent mode # does not apply and so we expect there to be more weights in the CCB model. assert ccb_num_weights > cb_num_weights
def test_MulticlassLabel_example(): n = 4 model = vowpalwabbit.Workspace(loss_function="logistic", oaa=n, quiet=True) ex = model.example("1 | a b c d", 2) ml2 = vowpalwabbit.MulticlassLabel.from_example(ex) assert ml2.label == 1 assert ml2.weight == 1.0 assert ml2.prediction == 0 assert str(ml2) == "1"
def test_ccb_non_slot_none_outcome(): model = vowpalwabbit.Workspace(quiet=True, ccb_explore_adf=True) example = vowpalwabbit.Example( vw=model, labelType=vowpalwabbit.LabelType.CONDITIONAL_CONTEXTUAL_BANDIT ) label = example.get_label(vowpalwabbit.CCBLabel) # CCB label is set to UNSET by default. assert label.type == vowpalwabbit.CCBLabelType.UNSET assert label.outcome is None
def helper_getting_started_example(which_cb): train_df, test_df = helper_get_data() vw = vowpalwabbit.Workspace(which_cb + " 4 --log_level off --cb_type mtr", enable_logging=True) for i in train_df.index: action = train_df.loc[i, "action"] cost = train_df.loc[i, "cost"] probability = train_df.loc[i, "probability"] feature1 = train_df.loc[i, "feature1"] feature2 = train_df.loc[i, "feature2"] feature3 = train_df.loc[i, "feature3"] learn_example = (str(action) + ":" + str(cost) + ":" + str(probability) + " | " + str(feature1) + " " + str(feature2) + " " + str(feature3)) vw.learn(learn_example) assert (vw.get_prediction_type() == vw.pMULTICLASS ), "prediction_type should be multiclass" for j in test_df.index: feature1 = test_df.loc[j, "feature1"] feature2 = test_df.loc[j, "feature2"] feature3 = test_df.loc[j, "feature3"] choice = vw.predict("| " + str(feature1) + " " + str(feature2) + " " + str(feature3)) assert isinstance(choice, int), "choice should be int" assert choice == 3, "predicted action should be 3 instead of " + str( choice) # test that metrics is empty since "--extra_metrics filename" was not supplied assert len(vw.get_learner_metrics()) == 0 vw.finish() output = vw.get_log() if which_cb.find("legacy") != -1: test_file = "test-sets/ref/python_test_cb_legacy.stderr" else: test_file = "test-sets/ref/python_test_cb.stderr" print("Output received:") print("----------------") print("\n".join(output)) print("----------------") with open(path.join(helper_get_test_dir(), test_file), "r") as file: expected = file.readlines() for expected_line, output_line in zip(expected, output): output_line = output_line.replace("...", "").strip() expected_line = expected_line.replace("...", "").strip() assert not is_line_different(output_line, expected_line, 0.001)
def test_MulticlassProbabilitiesLabel(): n = 4 model = vowpalwabbit.Workspace(loss_function="logistic", oaa=n, probabilities=True, quiet=True) ex = model.example("1 | a b c d", 2) model.learn(ex) mpl = vowpalwabbit.MulticlassProbabilitiesLabel.from_example(ex) assert str(mpl) == "1:0.25 2:0.25 3:0.25 4:0.25" mpl = vowpalwabbit.MulticlassProbabilitiesLabel([0.4, 0.3, 0.3]) assert str(mpl) == "1:0.4 2:0.3 3:0.3"
def helper_getting_started_example(which_cb): train_df, test_df = helper_get_data() vw = vowpalwabbit.Workspace(which_cb + " 4 --log_level off", enable_logging=True) for i in train_df.index: action = train_df.loc[i, "action"] cost = train_df.loc[i, "cost"] probability = train_df.loc[i, "probability"] feature1 = train_df.loc[i, "feature1"] feature2 = train_df.loc[i, "feature2"] feature3 = train_df.loc[i, "feature3"] learn_example = (str(action) + ":" + str(cost) + ":" + str(probability) + " | " + str(feature1) + " " + str(feature2) + " " + str(feature3)) vw.learn(learn_example) assert (vw.get_prediction_type() == vw.pMULTICLASS ), "prediction_type should be multiclass" for j in test_df.index: feature1 = test_df.loc[j, "feature1"] feature2 = test_df.loc[j, "feature2"] feature3 = test_df.loc[j, "feature3"] choice = vw.predict("| " + str(feature1) + " " + str(feature2) + " " + str(feature3)) assert isinstance(choice, int), "choice should be int" assert choice == 3, "predicted action should be 3 instead of " + str( choice) # test that metrics is empty since "--extra_metrics filename" was not supplied assert len(vw.get_learner_metrics()) == 0 vw.finish() output = vw.get_log() if which_cb.find("legacy") != -1: test_file = "test-sets/ref/python_test_cb_legacy.stderr" else: test_file = "test-sets/ref/python_test_cb.stderr" with open(path.join(helper_get_test_dir(), test_file), "r") as file: actual = file.readlines() for j, i in zip(actual, output): assert i == j, "line mismatch should be: " + j + " output: " + i
def test_cats(): min_value = 10 max_value = 20 vw = vowpalwabbit.Workspace("--cats 4 --min_value " + str(min_value) + " --max_value " + str(max_value) + " --bandwidth 1") vw_example = vw.parse("ca 15:0.657567:6.20426e-05 | f1 f2 f3 f4", vowpalwabbit.LabelType.CONTINUOUS) vw.learn(vw_example) vw.finish_example(vw_example) assert (vw.get_prediction_type() == vowpalwabbit.PredictionType. ACTION_PDF_VALUE), "prediction_type should be action_pdf_value" action, pdf_value = vw.predict("| f1 f2 f3 f4") assert action >= 10 assert action <= 20 vw.finish()
def test_dsjson(): vw = vowpalwabbit.Workspace("--cb_explore_adf --epsilon 0.2 --dsjson") ex_l_str = '{"_label_cost":-1.0,"_label_probability":0.5,"_label_Action":1,"_labelIndex":0,"o":[{"v":1.0,"EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","ActionTaken":false}],"Timestamp":"2020-11-15T17:09:31.8350000Z","Version":"1","EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","a":[1,2],"c":{ "GUser":{"id":"person5","major":"engineering","hobby":"hiking","favorite_character":"spock"}, "_multi": [ { "TAction":{"topic":"SkiConditions-VT"} }, { "TAction":{"topic":"HerbGarden"} } ] },"p":[0.5,0.5],"VWState":{"m":"N/A"}}\n' ex_l = vw.parse(ex_l_str) vw.learn(ex_l) pred = ex_l[0].get_action_scores() expected = [0.5, 0.5] assert len(pred) == len(expected) for a, b in zip(pred, expected): assert isclose(a, b) vw.finish_example(ex_l) ex_p = '{"_label_cost":-1.0,"_label_probability":0.5,"_label_Action":1,"_labelIndex":0,"o":[{"v":1.0,"EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","ActionTaken":false}],"Timestamp":"2020-11-15T17:09:31.8350000Z","Version":"1","EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","a":[1,2],"c":{ "GUser":{"id":"person5","major":"engineering","hobby":"hiking","favorite_character":"spock"}, "_multi": [ { "TAction":{"topic":"SkiConditions-VT"} }, { "TAction":{"topic":"HerbGarden"} } ] },"p":[0.5,0.5],"VWState":{"m":"N/A"}}\n' pred = vw.predict(ex_p) expected = [0.9, 0.1] assert len(pred) == len(expected) for a, b in zip(pred, expected): assert isclose(a, b)
def test_dsjson_with_metrics(): vw = vowpalwabbit.Workspace( "--extra_metrics metrics.json --cb_explore_adf --epsilon 0.2 --dsjson") ex_l_str = '{"_label_cost":-0.9,"_label_probability":0.5,"_label_Action":1,"_labelIndex":0,"o":[{"v":1.0,"EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","ActionTaken":false}],"Timestamp":"2020-11-15T17:09:31.8350000Z","Version":"1","EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","a":[1,2],"c":{ "GUser":{"id":"person5","major":"engineering","hobby":"hiking","favorite_character":"spock"}, "_multi": [ { "TAction":{"topic":"SkiConditions-VT"} }, { "TAction":{"topic":"HerbGarden"} } ] },"p":[0.5,0.5],"VWState":{"m":"N/A"}}\n' ex_l = vw.parse(ex_l_str) vw.learn(ex_l) pred = ex_l[0].get_action_scores() expected = [0.5, 0.5] assert len(pred) == len(expected) for a, b in zip(pred, expected): assert isclose(a, b) vw.finish_example(ex_l) ex_p = '{"_label_cost":-1.0,"_label_probability":0.5,"_label_Action":1,"_labelIndex":0,"o":[{"v":1.0,"EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","ActionTaken":false}],"Timestamp":"2020-11-15T17:09:31.8350000Z","Version":"1","EventId":"38cbf24f-70b2-4c76-aa0c-970d0c8d388e","a":[1,2],"c":{ "GUser":{"id":"person5","major":"engineering","hobby":"hiking","favorite_character":"spock"}, "_multi": [ { "TAction":{"topic":"SkiConditions-VT"} }, { "TAction":{"topic":"HerbGarden"} } ] },"p":[0.5,0.5],"VWState":{"m":"N/A"}}\n' pred = vw.predict(ex_p) expected = [0.9, 0.1] assert len(pred) == len(expected) for a, b in zip(pred, expected): assert isclose(a, b) learner_metric_dict = vw.get_learner_metrics() assert len(vw.get_learner_metrics()) == 17 assert learner_metric_dict["total_predict_calls"] == 2 assert learner_metric_dict["total_learn_calls"] == 1 assert learner_metric_dict["cbea_labeled_ex"] == 1 assert learner_metric_dict["cbea_predict_in_learn"] == 0 assert learner_metric_dict["cbea_label_first_action"] == 1 assert learner_metric_dict["cbea_label_not_first"] == 0 assert pytest.approx(learner_metric_dict["cbea_sum_cost"]) == -0.9 assert pytest.approx(learner_metric_dict["cbea_sum_cost_baseline"]) == -0.9 assert learner_metric_dict["cbea_non_zero_cost"] == 1 assert pytest.approx(learner_metric_dict["cbea_avg_feat_per_event"]) == 24 assert pytest.approx( learner_metric_dict["cbea_avg_actions_per_event"]) == 2 assert pytest.approx(learner_metric_dict["cbea_avg_ns_per_event"]) == 16 assert pytest.approx(learner_metric_dict["cbea_avg_feat_per_action"]) == 12 assert pytest.approx(learner_metric_dict["cbea_avg_ns_per_action"]) == 8 assert learner_metric_dict["cbea_min_actions"] == 2 assert learner_metric_dict["cbea_max_actions"] == 2 assert learner_metric_dict["sfm_count_learn_example_with_shared"] == 1
def main(): opts = sys.argv[1:] vowpalwabbit.Workspace(" ".join(opts))
def test_constructor_exception_is_safe(): try: vw = vowpalwabbit.Workspace("--invalid_option") except: pass
def test_not_runparser_cmd_string(): vw = vowpalwabbit.Workspace("") assert vw.parser_ran == False, "vw should set parser_ran to false" vw.finish()
def test_runparser_cmd_string_short(): vw = vowpalwabbit.Workspace("-d ./test/train-sets/rcv1_small.dat") assert vw.parser_ran == True, "vw should set parser_ran to true if --data present" vw.finish()
def main(): vowpalwabbit.Workspace(arg_list=sys.argv[1:])