示例#1
0
  def test_bundle_and_unbundle(self):
    # Initialize agent
    slate_size = 1
    num_candidates = 3
    action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))

    user_model = ie.IEUserModel(
        slate_size,
        user_state_ctor=ie.IEUserState,
        response_model_ctor=ie.IEResponse)
    agent = greedy_pctr_agent.GreedyPCTRAgent(action_space,
                                              user_model.avg_user_state)

    # Create a set of documents
    document_sampler = ie.IETopicDocumentSampler()
    documents = {}
    for i in range(num_candidates):
      video = document_sampler.sample_document()
      documents[i] = video.create_observation()

    # Test that slate indices in correct range and length is correct
    observation = dict(user=user_model.create_observation(), doc=documents)
    agent.step(1, observation)

    bundle_dict = agent.bundle_and_checkpoint('', 0)
    self.assertTrue(agent.unbundle('', 0, bundle_dict))
    self.assertEqual(bundle_dict, agent.bundle_and_checkpoint('', 0))
示例#2
0
 def test_bundle_and_unbundle_trivial(self):
   action_space = spaces.MultiDiscrete(np.ones((1,)))
   agent = greedy_pctr_agent.GreedyPCTRAgent(action_space, None)
   self.assertFalse(agent.unbundle('', 0, {}))
   self.assertEqual({
       'episode_num': 0
   }, agent.bundle_and_checkpoint('', 0))
示例#3
0
  def test_step(self):
    # Create a simple user
    slate_size = 2
    num_candidates = 5
    action_space = spaces.MultiDiscrete(num_candidates * np.ones((slate_size,)))
    user_model = ie.IEUserModel(
        slate_size,
        user_state_ctor=ie.IEUserState,
        response_model_ctor=ie.IEResponse)

    # Create a set of documents
    document_sampler = ie.IETopicDocumentSampler(seed=1)
    ieenv = environment.Environment(
        user_model,
        document_sampler,
        num_candidates,
        slate_size,
        resample_documents=True)

    # Create agent
    agent = greedy_pctr_agent.GreedyPCTRAgent(action_space,
                                              user_model.avg_user_state)

    # This agent doesn't use the previous user response
    observation, documents = ieenv.reset()
    slate = agent.step(1, dict(user=observation, doc=documents))
    scores = [
        user_model.avg_user_state.score_document(doc_obs)
        for doc_obs in list(documents.values())
    ]
    expected_slate = sorted(np.argsort(scores)[-2:])
    self.assertAllEqual(sorted(slate), expected_slate)
示例#4
0
 def test_find_best_documents(self):
   action_space = spaces.MultiDiscrete(4 * np.ones((4,)))
   agent = greedy_pctr_agent.GreedyPCTRAgent(action_space, None)
   scores = [-1, -2, 4.32, 0, 15, -6, 4.32]
   indices = agent.findBestDocuments(scores)
   self.assertAllEqual(indices, [4, 2, 6, 3])
示例#5
0
        'slate_size': slate_size,
        'resample_documents': True,
        'seed': 0,
        'reward_function': clicked_quality_reward
    }
    # User simulation environment: interest evolution model presented in the paper of SlateQ
    recsim_gym_env = interest_evolution.create_environment(env_config1)

    results_f = []

    with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
        #agent = full_slate_q_agent.FullSlateQAgent(sess,
        #recsim_gym_env.observation_space, recsim_gym_env.action_space)

        agent = greedy_pctr_agent.GreedyPCTRAgent(
            sess, recsim_gym_env.observation_space,
            recsim_gym_env.action_space)
        #agent = cluster_bandit_agent.ClusterBanditAgent(sess,recsim_gym_env.observation_space,recsim_gym_env.action_space)

        for i in range(10):

            steps_f, watch, time_f, q, q_vid, w_vid = evalRun_one_episode(
                recsim_gym_env, agent, "fsq")
            results_f += [[i, steps_f, watch, time_f, q, q_vid, w_vid]]
            print("episode ", i)
            sess.run(tf.global_variables_initializer())

    episode_steps_f = []
    episode_ratio_watch_f = []
    episode_total_quality_f = []
    episodes_qvf = []