def test_bundle_and_unbundle(self): # Initialize agent slate_size = 1 num_candidates = 3 action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,))) user_model = ie.IEUserModel( slate_size, user_state_ctor=ie.IEUserState, response_model_ctor=ie.IEResponse) agent = greedy_pctr_agent.GreedyPCTRAgent(action_space, user_model.avg_user_state) # Create a set of documents document_sampler = ie.IETopicDocumentSampler() documents = {} for i in range(num_candidates): video = document_sampler.sample_document() documents[i] = video.create_observation() # Test that slate indices in correct range and length is correct observation = dict(user=user_model.create_observation(), doc=documents) agent.step(1, observation) bundle_dict = agent.bundle_and_checkpoint('', 0) self.assertTrue(agent.unbundle('', 0, bundle_dict)) self.assertEqual(bundle_dict, agent.bundle_and_checkpoint('', 0))
def test_bundle_and_unbundle_trivial(self): action_space = spaces.MultiDiscrete(np.ones((1,))) agent = greedy_pctr_agent.GreedyPCTRAgent(action_space, None) self.assertFalse(agent.unbundle('', 0, {})) self.assertEqual({ 'episode_num': 0 }, agent.bundle_and_checkpoint('', 0))
def test_step(self): # Create a simple user slate_size = 2 num_candidates = 5 action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,))) user_model = ie.IEUserModel( slate_size, user_state_ctor=ie.IEUserState, response_model_ctor=ie.IEResponse) # Create a set of documents document_sampler = ie.IETopicDocumentSampler(seed=1) ieenv = environment.Environment( user_model, document_sampler, num_candidates, slate_size, resample_documents=True) # Create agent agent = greedy_pctr_agent.GreedyPCTRAgent(action_space, user_model.avg_user_state) # This agent doesn't use the previous user response observation, documents = ieenv.reset() slate = agent.step(1, dict(user=observation, doc=documents)) scores = [ user_model.avg_user_state.score_document(doc_obs) for doc_obs in list(documents.values()) ] expected_slate = sorted(np.argsort(scores)[-2:]) self.assertAllEqual(sorted(slate), expected_slate)
def test_find_best_documents(self): action_space = spaces.MultiDiscrete(4 * np.ones((4,))) agent = greedy_pctr_agent.GreedyPCTRAgent(action_space, None) scores = [-1, -2, 4.32, 0, 15, -6, 4.32] indices = agent.findBestDocuments(scores) self.assertAllEqual(indices, [4, 2, 6, 3])
'slate_size': slate_size, 'resample_documents': True, 'seed': 0, 'reward_function': clicked_quality_reward } # User simulation environment: interest evolution model presented in the paper of SlateQ recsim_gym_env = interest_evolution.create_environment(env_config1) results_f = [] with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: #agent = full_slate_q_agent.FullSlateQAgent(sess, #recsim_gym_env.observation_space, recsim_gym_env.action_space) agent = greedy_pctr_agent.GreedyPCTRAgent( sess, recsim_gym_env.observation_space, recsim_gym_env.action_space) #agent = cluster_bandit_agent.ClusterBanditAgent(sess,recsim_gym_env.observation_space,recsim_gym_env.action_space) for i in range(10): steps_f, watch, time_f, q, q_vid, w_vid = evalRun_one_episode( recsim_gym_env, agent, "fsq") results_f += [[i, steps_f, watch, time_f, q, q_vid, w_vid]] print("episode ", i) sess.run(tf.global_variables_initializer()) episode_steps_f = [] episode_ratio_watch_f = [] episode_total_quality_f = [] episodes_qvf = []