def transform_trlist_to_data(trList): """ """ timeSteps, rewards = unzip(trList) data = {"rewards": list(rewards), "time": timeSteps} return data
def learn(self, actionRewardTupleList): """ Learns from a list of ActionReward Tuples """ actions, rewards = utils.unzip( actionRewardTupleList ) self.Q.update( newValueOrList=rewards, indexList=actions )
def test_unzip(): x = [1, 2, 3] y = [4, 5, 6] z = zip(x, y) v, w = utils.unzip(z) assert v == x, "Unzip 1" assert w == y, "Unzip 2"
def learn(self, actionRewardTupleList): """ Learns from a list of ActionReward Tuples """ actions, rewards = utils.unzip( actionRewardTupleList ) self.Q.update( newValueOrList=rewards, indexList=actions ) # TODO Add multiple decays self.decay_epsilon()
def transform_arlist_to_data(arList): """ """ actions, rewards = unzip(arList) timeSteps = range(len(actions)) data = { "actions": list(actions), "rewards": list(rewards), "time": timeSteps } return data