Exemplo n.º 1
0
    def test_pytorch_model_country_as_dense_id_list(self):
        net_spec, pytorch_net = train_bandit.build_pytorch_net(
            feature_specs=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_DENSE_ID_LIST["features"],
            product_sets=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_DENSE_ID_LIST[
                "product_sets"],
            float_feature_order=Datasets.
            DATA_COUNTRY_DENSE_ID_LIST["final_float_feature_order"],
            id_feature_order=Datasets.
            DATA_COUNTRY_DENSE_ID_LIST["final_id_feature_order"],
            layers=Params.ML_PARAMS["model"]["layers"],
            activations=Params.ML_PARAMS["model"]["activations"],
            input_dim=train_bandit.num_float_dim(
                Datasets.DATA_COUNTRY_DENSE_ID_LIST),
        )

        skorch_net = train_bandit.fit_custom_pytorch_module_w_skorch(
            module=pytorch_net,
            X=Datasets.X_COUNTRY_DENSE_ID_LIST["X_train"],
            y=Datasets.X_COUNTRY_DENSE_ID_LIST["y_train"],
            hyperparams=Params.ML_PARAMS,
        )

        test_mse = skorch_net.history[-1]["valid_loss"]

        # make sure mse is better or close to out of the box GBDT & MLP
        # the GBDT doesn't need as much training so make tolerance more forgiving
        assert test_mse < self.results_gbdt["mse_test"] * 1.15
        assert test_mse < self.results_mlp["mse_test"] * 1.15
Exemplo n.º 2
0
    def test_pytorch_model_country_as_categorical(self):

        model_spec, pytorch_net = model_constructors.build_pytorch_net(
            feature_specs=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["features"],
            product_sets=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["product_sets"],
            float_feature_order=Datasets.
            DATA_COUNTRY_CATEG["final_float_feature_order"],
            id_feature_order=Datasets.
            DATA_COUNTRY_CATEG["final_id_feature_order"],
            reward_type=Params.ML_PARAMS["reward_type"],
            layers=self.model_params["layers"],
            activations=self.model_params["activations"],
            input_dim=train_bandit.num_float_dim(Datasets.DATA_COUNTRY_CATEG),
        )

        skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch(
            reward_type=Params.ML_PARAMS["reward_type"],
            model=pytorch_net,
            X=Datasets.X_COUNTRY_CATEG["X_train"],
            y=Datasets.X_COUNTRY_CATEG["y_train"],
            hyperparams=self.model_params,
        )

        test_mse = skorch_net.history[-1]["valid_loss"]

        # make sure mse is better or close to out of the box GBDT & MLP
        # the GBDT doesn't need as much training so make tolerance more forgiving
        assert test_mse < self.results_gbdt["mse_test"] * 1.15
        assert test_mse < self.results_mlp["mse_test"] * 1.15
Exemplo n.º 3
0
    def test_pytorch_model_country_as_categorical_binary_reward(self):
        reward_type = "binary"

        model_spec, pytorch_net = model_constructors.build_pytorch_net(
            feature_specs=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["features"],
            product_sets=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["product_sets"],
            float_feature_order=Datasets.
            DATA_COUNTRY_CATEG_BINARY_REWARD["final_float_feature_order"],
            id_feature_order=Datasets.
            DATA_COUNTRY_CATEG_BINARY_REWARD["final_id_feature_order"],
            reward_type=reward_type,
            layers=self.model_params["layers"],
            activations=self.model_params["activations"],
            input_dim=train_bandit.num_float_dim(
                Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD),
        )

        skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch(
            reward_type=reward_type,
            model=pytorch_net,
            X=Datasets.X_COUNTRY_CATEG_BINARY_REWARD["X_train"],
            y=Datasets.X_COUNTRY_CATEG_BINARY_REWARD["y_train"].squeeze(),
            hyperparams=self.model_params,
        )

        test_acc = skorch_net.history[-1]["valid_acc"]

        # make sure accuracy is better or close to out of the box GBDT.
        # The GBDT doesn't need as much training so make tolerance more forgiving
        assert test_acc > self.results_gbdt_classification["acc_test"] - 0.03
Exemplo n.º 4
0
    def test_pytorch_model_country_as_id_list_and_decision_as_id_list(self):
        model_spec, pytorch_net = model_constructors.build_pytorch_net(
            feature_specs=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AND_DECISION_AS_ID_LIST[
                "features"],
            product_sets=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AND_DECISION_AS_ID_LIST[
                "product_sets"],
            float_feature_order=Datasets.
            DATA_COUNTRY_AND_DECISION_ID_LIST["final_float_feature_order"],
            id_feature_order=Datasets.
            DATA_COUNTRY_AND_DECISION_ID_LIST["final_id_feature_order"],
            reward_type=Params.ML_PARAMS["reward_type"],
            layers=self.model_params["layers"],
            activations=self.model_params["activations"],
            input_dim=train_bandit.num_float_dim(
                Datasets.DATA_COUNTRY_AND_DECISION_ID_LIST),
        )

        skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch(
            reward_type=Params.ML_PARAMS["reward_type"],
            model=pytorch_net,
            X=Datasets.X_COUNTRY_AND_DECISION_ID_LIST["X_train"],
            y=Datasets.X_COUNTRY_AND_DECISION_ID_LIST["y_train"],
            hyperparams=self.model_params,
        )

        test_mse = skorch_net.history[-1]["valid_loss"]

        # make sure mse is better or close to out of the box GBDT & MLP
        # the GBDT doesn't need as much training so make tolerance more forgiving
        # also this is learning 2 embedding tables so need more training time
        # to be compeitive
        assert test_mse < self.results_gbdt["mse_test"] * 1.15
        assert test_mse < self.results_mlp["mse_test"] * 1.15
Exemplo n.º 5
0
    def test_mixture_density_networks_continuous(self):

        model_spec, pytorch_net = model_constructors.build_pytorch_net(
            feature_specs=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AND_DECISION_AS_ID_LIST[
                "features"],
            product_sets=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AND_DECISION_AS_ID_LIST[
                "product_sets"],
            float_feature_order=Datasets.
            DATA_COUNTRY_AND_DECISION_ID_LIST["final_float_feature_order"],
            id_feature_order=Datasets.
            DATA_COUNTRY_AND_DECISION_ID_LIST["final_id_feature_order"],
            reward_type=Params.ML_PARAMS["reward_type"],
            layers=self.model_params["layers"],
            activations=self.model_params["activations"],
            input_dim=train_bandit.num_float_dim(
                Datasets.DATA_COUNTRY_AND_DECISION_ID_LIST),
            is_mdn=True,
        )

        skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch(
            reward_type=Params.ML_PARAMS["reward_type"],
            model=pytorch_net,
            X=Datasets.X_COUNTRY_AND_DECISION_ID_LIST["X_train"],
            y=Datasets.X_COUNTRY_AND_DECISION_ID_LIST["y_train"],
            hyperparams=self.model_params,
            model_name="mixture_density_network",
        )

        X0 = Datasets.X_COUNTRY_AND_DECISION_ID_LIST["X_train"]
        preds = skorch_net.predict(X0)
        Y0 = Datasets.X_COUNTRY_AND_DECISION_ID_LIST["y_train"]

        b_size = skorch_net.batch_size
        idx = range(preds.shape[0])
        mu_est = [i for i in idx if i // b_size % 2 == 0]
        var_est = [i for i in idx if i // b_size % 2 == 1]
        mse = np.mean((preds[mu_est].flatten() - Y0.numpy().flatten())**2)

        assert (mse < 25)
Exemplo n.º 6
0
    def test_same_predictions_country_as_categorical(self):
        raw_data = shuffle(Datasets._raw_data)
        rand_idx = 0
        test_input = raw_data.iloc[rand_idx]

        data = preprocessor.preprocess_data(
            raw_data,
            self.ml_params["data_reader"]["reward_function"],
            Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL,
            shuffle_data=False,  # don't shuffle so we can test the same observation
        )

        _X, _y = preprocessor.data_to_pytorch(data)

        X_COUNTRY_CATEG = {
            "X_train": {"X_float": _X["X_float"][: Datasets._offset]},
            "y_train": _y[: Datasets._offset],
            "X_test": {"X_float": _X["X_float"][Datasets._offset :]},
            "y_test": _y[Datasets._offset :],
        }

        net_spec, pytorch_net = train_bandit.build_pytorch_net(
            feature_specs=Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL[
                "features"
            ],
            product_sets=Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL[
                "product_sets"
            ],
            float_feature_order=Datasets.DATA_COUNTRY_CATEG[
                "final_float_feature_order"
            ],
            id_feature_order=Datasets.DATA_COUNTRY_CATEG["final_id_feature_order"],
            layers=self.ml_params["model"]["layers"],
            activations=self.ml_params["model"]["activations"],
            input_dim=train_bandit.num_float_dim(Datasets.DATA_COUNTRY_CATEG),
        )

        pre_serialized_predictor = BanditPredictor(
            experiment_params=Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL,
            float_feature_order=Datasets.DATA_COUNTRY_CATEG["float_feature_order"],
            id_feature_order=Datasets.DATA_COUNTRY_CATEG["id_feature_order"],
            id_feature_str_to_int_map=Datasets.DATA_COUNTRY_CATEG[
                "id_feature_str_to_int_map"
            ],
            transforms=Datasets.DATA_COUNTRY_CATEG["transforms"],
            imputers=Datasets.DATA_COUNTRY_CATEG["imputers"],
            net=pytorch_net,
            net_spec=net_spec,
        )

        skorch_net = train_bandit.fit_custom_pytorch_module_w_skorch(
            module=pre_serialized_predictor.net,
            X=X_COUNTRY_CATEG["X_train"],
            y=X_COUNTRY_CATEG["y_train"],
            hyperparams=self.ml_params,
        )

        pre_serialized_predictor.config_to_file(self.tmp_config_path)
        pre_serialized_predictor.net_to_file(self.tmp_net_path)

        post_serialized_predictor = BanditPredictor.predictor_from_file(
            self.tmp_config_path, self.tmp_net_path
        )

        pre_pred = pre_serialized_predictor.predict(json.loads(test_input.context))
        post_pred = post_serialized_predictor.predict(json.loads(test_input.context))

        assert np.allclose(pre_pred["scores"], post_pred["scores"], self.tol)
        assert pre_pred["ids"] == post_pred["ids"]
Exemplo n.º 7
0
    def test_same_predictions_country_as_categorical_binary_reward(self):
        reward_type = "binary"

        raw_data = shuffle(Datasets._raw_data_binary_reward)
        rand_idx = 0
        test_input = raw_data.iloc[rand_idx]

        data = preprocessor.preprocess_data(
            raw_data,
            Params.EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL,
            reward_type,
            shuffle_data=
            False,  # don't shuffle so we can test the same observation
        )

        _X, _y = preprocessor.data_to_pytorch(data)
        X_COUNTRY_CATEG_BINARY_REWARD = {
            "X_train": {
                "X_float": _X["X_float"][:Datasets._offset_binary_reward]
            },
            "y_train": _y[:Datasets._offset_binary_reward],
            "X_test": {
                "X_float": _X["X_float"][Datasets._offset_binary_reward:]
            },
            "y_test": _y[Datasets._offset_binary_reward:],
        }

        model_spec, pytorch_net = model_constructors.build_pytorch_net(
            feature_specs=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["features"],
            product_sets=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL["product_sets"],
            float_feature_order=Datasets.
            DATA_COUNTRY_CATEG_BINARY_REWARD["final_float_feature_order"],
            id_feature_order=Datasets.
            DATA_COUNTRY_CATEG_BINARY_REWARD["final_id_feature_order"],
            reward_type=reward_type,
            layers=self.model_params["layers"],
            activations=self.model_params["activations"],
            input_dim=train_bandit.num_float_dim(
                Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD),
        )

        pre_serialized_predictor = BanditPredictor(
            experiment_params=Params.
            EXPERIMENT_SPECIFIC_PARAMS_COUNTRY_AS_CATEGORICAL,
            float_feature_order=Datasets.
            DATA_COUNTRY_CATEG_BINARY_REWARD["float_feature_order"],
            id_feature_order=Datasets.
            DATA_COUNTRY_CATEG_BINARY_REWARD["id_feature_order"],
            id_feature_str_to_int_map=Datasets.
            DATA_COUNTRY_CATEG_BINARY_REWARD["id_feature_str_to_int_map"],
            transforms=Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD["transforms"],
            imputers=Datasets.DATA_COUNTRY_CATEG_BINARY_REWARD["imputers"],
            model=pytorch_net,
            model_type=self.model_type,
            reward_type=reward_type,
            model_spec=model_spec,
        )

        skorch_net = model_trainers.fit_custom_pytorch_module_w_skorch(
            reward_type=reward_type,
            model=pre_serialized_predictor.model,
            X=X_COUNTRY_CATEG_BINARY_REWARD["X_train"],
            y=X_COUNTRY_CATEG_BINARY_REWARD["y_train"],
            hyperparams=self.model_params,
        )

        pre_serialized_predictor.config_to_file(self.tmp_config_path)
        pre_serialized_predictor.model_to_file(self.tmp_net_path)

        post_serialized_predictor = BanditPredictor.predictor_from_file(
            self.tmp_config_path, self.tmp_net_path)

        pre_pred = pre_serialized_predictor.predict(
            json.loads(test_input.context))
        post_pred = post_serialized_predictor.predict(
            json.loads(test_input.context))

        assert np.allclose(pre_pred["scores"], post_pred["scores"], self.tol)
        assert pre_pred["ids"] == post_pred["ids"]

        # add a test case for missing features in provided context
        pre_pred_missing_feature = pre_serialized_predictor.predict({})
        post_pred_missing_feature = post_serialized_predictor.predict({})

        assert np.allclose(
            pre_pred_missing_feature["scores"],
            post_pred_missing_feature["scores"],
            self.tol,
        )
        assert pre_pred_missing_feature["ids"] == post_pred_missing_feature[
            "ids"]

        # add a test case for garbage feature keys provided in context
        pre_pred_garbage_feature = pre_serialized_predictor.predict(
            {"blah": 42})
        post_pred_garbage_feature = post_serialized_predictor.predict(
            {"blah": 42})

        assert np.allclose(
            pre_pred_garbage_feature["scores"],
            post_pred_garbage_feature["scores"],
            self.tol,
        )
        assert pre_pred_garbage_feature["ids"] == post_pred_garbage_feature[
            "ids"]