def cb(outputs): try: distrib1, distrib2, value1, value2 = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(ident)) return assert np.all(np.isfinite(distrib1)), distrib1 assert np.all(np.isfinite(distrib2)), distrib2 rand_num = np.random.rand() if rand_num < 0.5: action = np.random.choice(len(distrib1), p=distrib1) updateweight1, updateweight2 = 1.0, 0.0 else: action = np.random.choice(len(distrib2), p=distrib2) updateweight2, updateweight1 = 1.0, 0.0 client = self.clients[ident] client.memory.append( TransitionExperience(state, action, reward=None, value1=value1, value2=value2, updateweight1=updateweight1, updateweight2=updateweight2, prob1=distrib1[action], prob2=distrib2[action])) self.send_queue.put([ident, dumps(action)])
def cb(outputs): try: distrib, value = outputs.result() except CancelledError: logger.info("Client {} cancelled.".format(client.ident)) return assert np.all(np.isfinite(distrib)), distrib action = np.random.choice(len(distrib), p=distrib) client.memory.append(TransitionExperience( state, action, reward=None, value=value, prob=distrib[action])) self.send_queue.put([client.ident, dumps(action)])