def test_same_predictions_country_as_categorical_sklearn_model_binary_reward( self): """ Tests sklearn Linear model + binary reward. """ reward_type = "binary" raw_data = shuffle(Datasets._raw_data_binary_reward) rand_idx = 0 test_input = raw_data.iloc[rand_idx] data = preprocessor.preprocess_data( raw_data, Params.FEATURE_CONFIG_COUNTRY_AS_CATEGORICAL, reward_type, shuffle_data= False, # don't shuffle so we can test the same observation ) _X, _y = preprocessor.data_to_pytorch(data) X_COUNTRY_CATEG_BINARY_REWARD = { "X_train": { "X_float": _X["X_float"][:Datasets._offset_binary_reward] }, "y_train": _y[:Datasets._offset_binary_reward], "X_test": { "X_float": _X["X_float"][Datasets._offset_binary_reward:] }, "y_test": _y[Datasets._offset_binary_reward:], } model = model_constructors.build_linear_model(reward_type=reward_type) pre_serialized_predictor = BanditPredictor( feature_config=Params.FEATURE_CONFIG_COUNTRY_AS_CATEGORICAL, float_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["id_feature_order"], id_feature_str_to_int_map=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["id_feature_str_to_int_map"], transforms=Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD["transforms"], imputers=Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD["imputers"], model=model, model_type="linear_bandit", reward_type=reward_type, model_spec=None, ) skorch_net = model_trainers.fit_sklearn_model( reward_type=reward_type, model=model, X=X_COUNTRY_CATEG_BINARY_REWARD["X_train"], y=X_COUNTRY_CATEG_BINARY_REWARD["y_train"], ) pre_serialized_predictor.config_to_file(self.tmp_config_path) pre_serialized_predictor.model_to_file(self.tmp_net_path) post_serialized_predictor = BanditPredictor.predictor_from_file( self.tmp_config_path, self.tmp_net_path) pre_pred = pre_serialized_predictor.predict( json.loads(test_input.context)) post_pred = post_serialized_predictor.predict( json.loads(test_input.context)) assert np.allclose(pre_pred["scores"], post_pred["scores"], self.tol) assert pre_pred["ids"] == post_pred["ids"]
def test_same_predictions_country_as_dense_id_list(self): raw_data = shuffle(Datasets._raw_data) rand_idx = 0 test_input = raw_data.iloc[rand_idx] data = preprocessor.preprocess_data( raw_data, Params.FEATURE_CONFIG_COUNTRY_AS_DENSE_ID_LIST, Params.ML_CONFIG["reward_type"], shuffle_data= False, # don't shuffle so we can test the same observation ) _X, _y = preprocessor.data_to_pytorch(data) X_COUNTRY_DENSE_ID_LIST = { "X_train": { "X_float": _X["X_float"][:Datasets._offset] }, "y_train": _y[:Datasets._offset], "X_test": { "X_float": _X["X_float"][Datasets._offset:] }, "y_test": _y[Datasets._offset:], } model_spec, pytorch_net = model_constructors.build_pytorch_net( feature_specs=Params. FEATURE_CONFIG_COUNTRY_AS_DENSE_ID_LIST["features"], product_sets=Params. FEATURE_CONFIG_COUNTRY_AS_DENSE_ID_LIST["product_sets"], float_feature_order=Datasets. DATA_COUNTRY_DENSE_ID_LIST["final_float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_DENSE_ID_LIST["final_id_feature_order"], reward_type=Params.ML_CONFIG["reward_type"], layers=self.model_params["layers"], activations=self.model_params["activations"], input_dim=trainer.num_float_dim( Datasets.DATA_COUNTRY_DENSE_ID_LIST), ) pre_serialized_predictor = BanditPredictor( feature_config=Params.FEATURE_CONFIG_COUNTRY_AS_DENSE_ID_LIST, float_feature_order=Datasets. DATA_COUNTRY_DENSE_ID_LIST["float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_DENSE_ID_LIST["id_feature_order"], id_feature_str_to_int_map=Datasets. DATA_COUNTRY_DENSE_ID_LIST["id_feature_str_to_int_map"], transforms=Datasets.DATA_COUNTRY_DENSE_ID_LIST["transforms"], imputers=Datasets.DATA_COUNTRY_DENSE_ID_LIST["imputers"], model=pytorch_net, model_type=self.model_type, reward_type=Params.ML_CONFIG["reward_type"], model_spec=model_spec, ) skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch( reward_type=Params.ML_CONFIG["reward_type"], model=pre_serialized_predictor.model, X=X_COUNTRY_DENSE_ID_LIST["X_train"], y=X_COUNTRY_DENSE_ID_LIST["y_train"], hyperparams=self.model_params, ) pre_serialized_predictor.config_to_file(self.tmp_config_path) pre_serialized_predictor.model_to_file(self.tmp_net_path) post_serialized_predictor = BanditPredictor.predictor_from_file( self.tmp_config_path, self.tmp_net_path) pre_pred = pre_serialized_predictor.predict( json.loads(test_input.context)) post_pred = post_serialized_predictor.predict( json.loads(test_input.context)) assert np.allclose(pre_pred["scores"], post_pred["scores"], self.tol) assert pre_pred["ids"] == post_pred["ids"]
def test_same_predictions_country_as_categorical_binary_reward(self): reward_type = "binary" raw_data = shuffle(Datasets._raw_data_binary_reward) rand_idx = 0 test_input = raw_data.iloc[rand_idx] data = preprocessor.preprocess_data( raw_data, Params.FEATURE_CONFIG_COUNTRY_AS_CATEGORICAL, reward_type, shuffle_data= False, # don't shuffle so we can test the same observation ) _X, _y = preprocessor.data_to_pytorch(data) X_COUNTRY_CATEG_BINARY_REWARD = { "X_train": { "X_float": _X["X_float"][:Datasets._offset_binary_reward] }, "y_train": _y[:Datasets._offset_binary_reward], "X_test": { "X_float": _X["X_float"][Datasets._offset_binary_reward:] }, "y_test": _y[Datasets._offset_binary_reward:], } model_spec, pytorch_net = model_constructors.build_pytorch_net( feature_specs=Params. FEATURE_CONFIG_COUNTRY_AS_CATEGORICAL["features"], product_sets=Params. FEATURE_CONFIG_COUNTRY_AS_CATEGORICAL["product_sets"], float_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["final_float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["final_id_feature_order"], reward_type=reward_type, layers=self.model_params["layers"], activations=self.model_params["activations"], input_dim=trainer.num_float_dim( Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD), ) pre_serialized_predictor = BanditPredictor( feature_config=Params.FEATURE_CONFIG_COUNTRY_AS_CATEGORICAL, float_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["float_feature_order"], id_feature_order=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["id_feature_order"], id_feature_str_to_int_map=Datasets. DATA_COUNTRY_CATEG_BINARY_REWARD["id_feature_str_to_int_map"], transforms=Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD["transforms"], imputers=Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD["imputers"], model=pytorch_net, model_type=self.model_type, reward_type=reward_type, model_spec=model_spec, ) skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch( reward_type=reward_type, model=pre_serialized_predictor.model, X=X_COUNTRY_CATEG_BINARY_REWARD["X_train"], y=X_COUNTRY_CATEG_BINARY_REWARD["y_train"], hyperparams=self.model_params, ) pre_serialized_predictor.config_to_file(self.tmp_config_path) pre_serialized_predictor.model_to_file(self.tmp_net_path) post_serialized_predictor = BanditPredictor.predictor_from_file( self.tmp_config_path, self.tmp_net_path) pre_pred = pre_serialized_predictor.predict( json.loads(test_input.context)) post_pred = post_serialized_predictor.predict( json.loads(test_input.context)) assert np.allclose(pre_pred["scores"], post_pred["scores"], self.tol) assert pre_pred["ids"] == post_pred["ids"] # add a test case for missing features in provided context pre_pred_missing_feature = pre_serialized_predictor.predict({}) post_pred_missing_feature = post_serialized_predictor.predict({}) assert np.allclose( pre_pred_missing_feature["scores"], post_pred_missing_feature["scores"], self.tol, ) assert pre_pred_missing_feature["ids"] == post_pred_missing_feature[ "ids"] # add a test case for garbage feature keys provided in context pre_pred_garbage_feature = pre_serialized_predictor.predict( {"blah": 42}) post_pred_garbage_feature = post_serialized_predictor.predict( {"blah": 42}) assert np.allclose( pre_pred_garbage_feature["scores"], post_pred_garbage_feature["scores"], self.tol, ) assert pre_pred_garbage_feature["ids"] == post_pred_garbage_feature[ "ids"]
class Datasets: TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_DATASET_DIR = "datasets" # continuous reward TEST_DATASET_FILENAME = "height_dataset.csv" DATASET_PATH = os.path.join(TEST_DIR, TEST_DATASET_DIR, TEST_DATASET_FILENAME) _raw_data = pd.read_csv(DATASET_PATH) _offset = int(len(_raw_data) * Params.ML_CONFIG["train_percent"]) # dataset for country as categorical variable DATA_COUNTRY_CATEG = preprocessor.preprocess_data( _raw_data, Params.FEATURE_CONFIG_COUNTRY_AS_CATEGORICAL, Params.ML_CONFIG["reward_type"], ) _X, _y = preprocessor.data_to_pytorch(DATA_COUNTRY_CATEG) X_COUNTRY_CATEG = { "X_train": { "X_float": _X["X_float"][:_offset] }, "y_train": _y[:_offset], "X_test": { "X_float": _X["X_float"][_offset:] }, "y_test": _y[_offset:], } # dataset for country as ID list variable DATA_COUNTRY_ID_LIST = preprocessor.preprocess_data( _raw_data, Params.FEATURE_CONFIG_COUNTRY_AS_ID_LIST, Params.ML_CONFIG["reward_type"], ) _X, _y = preprocessor.data_to_pytorch(DATA_COUNTRY_ID_LIST) X_COUNTRY_ID_LIST = { "X_train": { "X_float": _X["X_float"][:_offset], "X_id_list": _X["X_id_list"][:_offset], "X_id_list_idxs": _X["X_id_list_idxs"][:_offset], }, "y_train": _y[:_offset], "X_test": { "X_float": _X["X_float"][_offset:], "X_id_list": _X["X_id_list"][_offset:], "X_id_list_idxs": _X["X_id_list_idxs"][_offset:], }, "y_test": _y[_offset:], } # dataset for country as dense ID list variable DATA_COUNTRY_DENSE_ID_LIST = preprocessor.preprocess_data( _raw_data, Params.FEATURE_CONFIG_COUNTRY_AS_DENSE_ID_LIST, Params.ML_CONFIG["reward_type"], ) _X, _y = preprocessor.data_to_pytorch(DATA_COUNTRY_DENSE_ID_LIST) X_COUNTRY_DENSE_ID_LIST = { "X_train": { "X_float": _X["X_float"][:_offset] }, "y_train": _y[:_offset], "X_test": { "X_float": _X["X_float"][_offset:] }, "y_test": _y[_offset:], } # dataset for country as ID list AND decision as ID list variables DATA_COUNTRY_AND_DECISION_ID_LIST = preprocessor.preprocess_data( _raw_data, Params.FEATURE_CONFIG_COUNTRY_AND_DECISION_AS_ID_LIST, Params.ML_CONFIG["reward_type"], ) _X, _y = preprocessor.data_to_pytorch(DATA_COUNTRY_AND_DECISION_ID_LIST) X_COUNTRY_AND_DECISION_ID_LIST = { "X_train": { "X_float": _X["X_float"][:_offset], "X_id_list": _X["X_id_list"][:_offset], "X_id_list_idxs": _X["X_id_list_idxs"][:_offset], }, "y_train": _y[:_offset], "X_test": { "X_float": _X["X_float"][_offset:], "X_id_list": _X["X_id_list"][_offset:], "X_id_list_idxs": _X["X_id_list_idxs"][_offset:], }, "y_test": _y[_offset:], } # binary reward TEST_BINARY_REWARD_DATASET_FILENAME = "height_dataset_binary.csv" BINARY_REWARD_DATASET_PATH = os.path.join( TEST_DIR, TEST_DATASET_DIR, TEST_BINARY_REWARD_DATASET_FILENAME) _raw_data_binary_reward = pd.read_csv(BINARY_REWARD_DATASET_PATH) _offset_binary_reward = int( len(_raw_data_binary_reward) * Params.ML_CONFIG["train_percent"]) # dataset for country as categorical variable & binary reward DATA_COUNTRY_CATEG_BINARY_REWARD = preprocessor.preprocess_data( _raw_data_binary_reward, Params.FEATURE_CONFIG_COUNTRY_AS_CATEGORICAL, "binary", ) _X_binary_reward, _y_binary_reward = preprocessor.data_to_pytorch( DATA_COUNTRY_CATEG_BINARY_REWARD) X_COUNTRY_CATEG_BINARY_REWARD = { "X_train": { "X_float": _X_binary_reward["X_float"][:_offset_binary_reward] }, "y_train": _y_binary_reward[:_offset_binary_reward], "X_test": { "X_float": _X_binary_reward["X_float"][_offset_binary_reward:] }, "y_test": _y_binary_reward[_offset_binary_reward:], }