示例#1
0
 def cb(outputs):
     try:
         distrib1, distrib2, value1, value2 = outputs.result()
     except CancelledError:
         logger.info("Client {} cancelled.".format(ident))
         return
     assert np.all(np.isfinite(distrib1)), distrib1
     assert np.all(np.isfinite(distrib2)), distrib2
     rand_num = np.random.rand()
     if rand_num < 0.5:
         action = np.random.choice(len(distrib1), p=distrib1)
         updateweight1, updateweight2 = 1.0, 0.0
     else:
         action = np.random.choice(len(distrib2), p=distrib2)
         updateweight2, updateweight1 = 1.0, 0.0
     client = self.clients[ident]
     client.memory.append(
         TransitionExperience(state,
                              action,
                              reward=None,
                              value1=value1,
                              value2=value2,
                              updateweight1=updateweight1,
                              updateweight2=updateweight2,
                              prob1=distrib1[action],
                              prob2=distrib2[action]))
     self.send_queue.put([ident, dumps(action)])
示例#2
0
 def cb(outputs):
     try:
         distrib, value = outputs.result()
     except CancelledError:
         logger.info("Client {} cancelled.".format(client.ident))
         return
     assert np.all(np.isfinite(distrib)), distrib
     action = np.random.choice(len(distrib), p=distrib)
     client.memory.append(TransitionExperience(
         state, action, reward=None, value=value, prob=distrib[action]))
     self.send_queue.put([client.ident, dumps(action)])