def test_nn_policy_learner_fit(): context = np.ones((100, 2), dtype=np.float32) action = np.zeros((100, ), dtype=int) reward = np.ones((100, ), dtype=np.float32) pscore = np.array([0.5] * 100, dtype=np.float32) ipw = InverseProbabilityWeighting() # inconsistency with the shape with pytest.raises(ValueError): learner = NNPolicyLearner( n_actions=2, dim_context=2, off_policy_objective=ipw.estimate_policy_value_tensor, ) variant_context = np.ones((101, 2), dtype=np.float32) learner.fit(context=variant_context, action=action, reward=reward, pscore=pscore) # inconsistency between dim_context and context with pytest.raises(ValueError): learner = NNPolicyLearner( n_actions=2, dim_context=3, off_policy_objective=ipw.estimate_policy_value_tensor, ) learner.fit(context=context, action=action, reward=reward, pscore=pscore)
def test_nn_policy_learner_fit(): context = np.ones((100, 2), dtype=np.float32) action = np.zeros((100, ), dtype=int) reward = np.ones((100, ), dtype=np.float32) pscore = np.array([0.5] * 100, dtype=np.float32) # inconsistency with the shape desc = "Expected `context.shape[0]" with pytest.raises(ValueError, match=f"{desc}*"): learner = NNPolicyLearner(n_actions=2, dim_context=2, off_policy_objective="ipw") variant_context = np.ones((101, 2), dtype=np.float32) learner.fit(context=variant_context, action=action, reward=reward, pscore=pscore) # inconsistency between dim_context and context desc = "Expected `context.shape[1]" with pytest.raises(ValueError, match=f"{desc}*"): learner = NNPolicyLearner(n_actions=2, dim_context=3, off_policy_objective="ipw") learner.fit(context=context, action=action, reward=reward, pscore=pscore)
def test_nn_policy_learner_init_using_invalid_inputs( n_actions, len_list, dim_context, off_policy_objective, lambda_, policy_reg_param, var_reg_param, hidden_layer_size, activation, solver, alpha, batch_size, learning_rate_init, max_iter, shuffle, random_state, tol, momentum, nesterovs_momentum, early_stopping, validation_fraction, beta_1, beta_2, epsilon, n_iter_no_change, err, description, ): with pytest.raises(err, match=f"{description}*"): _ = NNPolicyLearner( n_actions=n_actions, len_list=len_list, dim_context=dim_context, off_policy_objective=off_policy_objective, lambda_=lambda_, policy_reg_param=policy_reg_param, var_reg_param=var_reg_param, hidden_layer_size=hidden_layer_size, activation=activation, solver=solver, alpha=alpha, batch_size=batch_size, learning_rate_init=learning_rate_init, max_iter=max_iter, shuffle=shuffle, random_state=random_state, tol=tol, momentum=momentum, nesterovs_momentum=nesterovs_momentum, early_stopping=early_stopping, validation_fraction=validation_fraction, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, n_iter_no_change=n_iter_no_change, )
def test_nn_policy_learner_init_using_valid_inputs( n_actions, len_list, dim_context, off_policy_objective, hidden_layer_size, activation, solver, alpha, batch_size, learning_rate_init, max_iter, shuffle, random_state, tol, momentum, nesterovs_momentum, early_stopping, validation_fraction, beta_1, beta_2, epsilon, n_iter_no_change, max_fun, description, ): nn_policy_learner = NNPolicyLearner( n_actions=n_actions, len_list=len_list, dim_context=dim_context, off_policy_objective=off_policy_objective, hidden_layer_size=hidden_layer_size, activation=activation, solver=solver, alpha=alpha, batch_size=batch_size, learning_rate_init=learning_rate_init, max_iter=max_iter, shuffle=shuffle, random_state=random_state, tol=tol, momentum=momentum, nesterovs_momentum=nesterovs_momentum, early_stopping=early_stopping, validation_fraction=validation_fraction, beta_1=beta_1, beta_2=beta_2, epsilon=epsilon, n_iter_no_change=n_iter_no_change, max_fun=max_fun, ) assert isinstance(nn_policy_learner, NNPolicyLearner)
def test_nn_policy_learner_create_train_data_for_opl(): context = np.ones((100, 2), dtype=np.int32) action = np.zeros((100, ), dtype=np.int32) reward = np.ones((100, ), dtype=np.float32) pscore = np.array([0.5] * 100, dtype=np.float32) estimated_rewards_by_reg_model = np.ones((100, 2), dtype=np.float32) position = np.zeros((100, ), dtype=np.int32) ipw = InverseProbabilityWeighting() learner1 = NNPolicyLearner( n_actions=2, dim_context=2, off_policy_objective=ipw.estimate_policy_value_tensor, ) training_loader, validation_loader = learner1._create_train_data_for_opl( context=context, action=action, reward=reward, pscore=pscore, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, position=position, ) assert isinstance(training_loader, torch.utils.data.DataLoader) assert validation_loader is None learner2 = NNPolicyLearner( n_actions=2, dim_context=2, off_policy_objective=ipw.estimate_policy_value_tensor, early_stopping=True, ) training_loader, validation_loader = learner2._create_train_data_for_opl( context=context, action=action, reward=reward, pscore=pscore, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, position=position, ) assert isinstance(training_loader, torch.utils.data.DataLoader) assert isinstance(validation_loader, torch.utils.data.DataLoader)
def test_nn_policy_learner_predict_proba(): n_actions = 2 len_list = 1 context = np.ones((100, 2), dtype=np.float32) context_test = np.array([i for i in range(10)], dtype=np.float32).reshape(5, 2) action = np.zeros((100, ), dtype=int) reward = np.ones((100, ), dtype=np.float32) pscore = np.array([0.5] * 100, dtype=np.float32) ipw = InverseProbabilityWeighting() # shape error with pytest.raises(ValueError): learner = NNPolicyLearner( n_actions=n_actions, len_list=len_list, dim_context=2, off_policy_objective=ipw.estimate_policy_value_tensor, ) learner.fit(context=context, action=action, reward=reward, pscore=pscore) invalid_context = np.array([1.0, 1.0], dtype=np.float32) learner.predict_proba(context=invalid_context) # inconsistency between dim_context and context with pytest.raises(ValueError): learner = NNPolicyLearner( n_actions=n_actions, len_list=len_list, dim_context=2, off_policy_objective=ipw.estimate_policy_value_tensor, ) learner.fit(context=context, action=action, reward=reward, pscore=pscore) invalid_context = np.array([[1.0, 1.0, 1.0]], dtype=np.float32) learner.predict_proba(context=invalid_context) learner = NNPolicyLearner( n_actions=n_actions, len_list=len_list, dim_context=2, off_policy_objective=ipw.estimate_policy_value_tensor, ) learner.fit(context=context, action=action, reward=reward, pscore=pscore) action_dist = learner.predict_proba(context=context_test) assert np.allclose(action_dist.sum(1), np.ones_like((context_test.shape[0], len_list))) assert action_dist.shape[0] == context_test.shape[0] assert action_dist.shape[1] == n_actions assert action_dist.shape[2] == len_list
def test_nn_policy_learner_predict_proba(): n_actions = 2 len_list = 1 context = np.ones((100, 2), dtype=np.float32) context_test = np.array([i for i in range(10)], dtype=np.float32).reshape(5, 2) action = np.zeros((100, ), dtype=int) reward = np.ones((100, ), dtype=np.float32) pscore = np.array([0.5] * 100, dtype=np.float32) # shape error desc = "`context` must be 2D array" with pytest.raises(ValueError, match=f"{desc}*"): learner = NNPolicyLearner( n_actions=n_actions, len_list=len_list, dim_context=2, off_policy_objective="ipw", ) learner.fit(context=context, action=action, reward=reward, pscore=pscore) invalid_context = np.array([1.0, 1.0], dtype=np.float32) learner.predict_proba(context=invalid_context) # inconsistency between dim_context and context desc = "Expected `context.shape[1]" with pytest.raises(ValueError, match=f"{desc}*"): learner = NNPolicyLearner( n_actions=n_actions, len_list=len_list, dim_context=2, off_policy_objective="ipw", ) learner.fit(context=context, action=action, reward=reward, pscore=pscore) invalid_context = np.array([[1.0, 1.0, 1.0]], dtype=np.float32) learner.predict_proba(context=invalid_context) learner = NNPolicyLearner( n_actions=n_actions, len_list=len_list, dim_context=2, off_policy_objective="ipw", ) learner.fit(context=context, action=action, reward=reward, pscore=pscore) action_dist = learner.predict_proba(context=context_test) assert np.allclose(action_dist.sum(1), np.ones_like((context_test.shape[0], len_list))) assert action_dist.shape[0] == context_test.shape[0] assert action_dist.shape[1] == n_actions assert action_dist.shape[2] == len_list