示例#1
0
文件: tnn.py 项目: tknandu/HRL_PCCA-
class TNN():
    """
    Transition Neural Network. Includes experience replay (interesting to see how it will work for Transition network da)!
    
    ouput layer has 1 node - transition probability
    input_size: the number of inputs (2* size of state representation)
    max_experiences: the total number of experiences to save for replay
    gamma: future rewards discount rate
    alpha: learning rate for underlying NN
    use_sarsa: flag whether to use the SARSA update rule
    """
    
    def __call__(self,s1,s2):
        """ implement here the returned T(s1,s2)
        """
        return self.GetValue(s1,s2)

    def __init__(self, input_size, max_experiences=500, alpha=0.1):
        # lay = [input_size, int((nactions+input_size)/2.0), nactions]
        lay = [input_size, int((1+input_size)/2.0), 1]
        self.NN = NeuralNet(layers=lay, epsilon=0.154, learningRate=alpha)
        self.experiences = []
        self.max_experiences = max_experiences
        self.prob_remember = 0.1
        self.num_replay_samples = 10

    def GetValue(self, s1, s2):
        """ Return the T(s_1,s_2)
        """
        out = self.NN.propagate(np.concatenate((s1,s2)))
        return out[a]

    def Update(self, s1, s2, t):
        """ update transition prob t
        """
        a = np.zeros(1)
        a[0] = t
        self.NN.propagateAndUpdate(np.concatenate((s1,s2)),a)

    def RememberExperience(self, s1, s2, t):
        if (random.random() < self.prob_remember):
            if (len(self.experiences) >= self.max_experiences):
                #TODO: Something more intelligent about how we determine what is worth forgetting
                self.experiences.pop(random.randint(0, self.max_experiences-1))
            self.experiences.append(T_Experience(s1, s2, t))

    def ExperienceReplay(self):
        #Skip until we have enough experience
        if (len(self.experiences) < self.num_replay_samples):
            return
        for i in xrange(self.num_replay_samples):
            index = random.randint(0, len(self.experiences)-1)
            exp = self.experiences[index]
            self.Update(exp.s1, exp.s2, exp.t)
示例#2
0
文件: tnn.py 项目: tknandu/HRL_PCCA-
 def __init__(self, input_size, max_experiences=500, alpha=0.1):
     # lay = [input_size, int((nactions+input_size)/2.0), nactions]
     lay = [input_size, int((1+input_size)/2.0), 1]
     self.NN = NeuralNet(layers=lay, epsilon=0.154, learningRate=alpha)
     self.experiences = []
     self.max_experiences = max_experiences
     self.prob_remember = 0.1
     self.num_replay_samples = 10
示例#3
0
 def __init__(self, nactions, input_size, max_experiences=500, gamma=0.6, alpha=0.1, use_sarsa=False):
     # lay = [input_size, int((nactions+input_size)/2.0), nactions]
     lay = [input_size, int((nactions+input_size)/2.0), 1, nactions]
     self.nactions = nactions
     self.NN = NeuralNet(layers=lay, epsilon=0.154, learningRate=alpha)
     self.experiences = []
     self.max_experiences = max_experiences
     self.gamma = gamma
     self.use_sarsa = use_sarsa
     self.prob_remember = 0.1
     self.num_replay_samples = 10
示例#4
0
class Deep_QNN():
    """
    Neural Q-Network. Includes experience replay!
    nactions: the number of actions
    input_size: the number of inputs
    max_experiences: the total number of experiences to save for replay
    gamma: future rewards discount rate
    alpha: learning rate for underlying NN
    use_sarsa: flag whether to use the SARSA update rule
    """
    def __call__(self,s,a=None):
        """ implement here the returned Qvalue of state (s) and action(a)
        e.g. Q.GetValue(s,a) is equivalent to Q(s,a)
        """
        if a==None:
            return self.GetValue(s)
        return self.GetValue(s,a)

    def __init__(self, nactions, input_size, max_experiences=500, gamma=0.6, alpha=0.1, use_sarsa=False):
        # lay = [input_size, int((nactions+input_size)/2.0), nactions]
        lay = [input_size, int((nactions+input_size)/2.0), 1, nactions]
        self.nactions = nactions
        self.NN = NeuralNet(layers=lay, epsilon=0.154, learningRate=alpha)
        self.experiences = []
        self.max_experiences = max_experiences
        self.gamma = gamma
        self.use_sarsa = use_sarsa
        self.prob_remember = 0.1
        self.num_replay_samples = 10

    def GetValue(self, s, a=None):
        """ Return the Q(s,a) value of state (s) for action (a)
        or al values for Q(s)
        """
        out = self.NN.propagate(s)
        if (a==None):
            return out
        return out[a]

    def Update(self, s1, a1, r, s2, a2):
        """ update action value for action(a)
        """
        if (self.use_sarsa):
            v = r + self.gamma*self.GetValue(s2, a2)
        else:
            v = r + self.gamma*max(self.GetValue(s2))
        a = np.zeros(self.nactions)
        a[a1] = v
        self.NN.propagateAndUpdate(s1, a)

    def RememberExperience(self, s1, a1, r, s2, a2):
        if (random.random() < self.prob_remember):
            if (len(self.experiences) >= self.max_experiences):
                #TODO: Something more intelligent about how we determine what is worth forgetting
                self.experiences.pop(random.randint(0, self.max_experiences-1))
            self.experiences.append(Experience(s1, a1, r, s2, a2))

    def ExperienceReplay(self):
        #Skip until we have enough experience
        if (len(self.experiences) < self.num_replay_samples):
            return
        for i in xrange(self.num_replay_samples):
            index = random.randint(0, len(self.experiences)-1)
            exp = self.experiences[index]
            self.Update(exp.s1, exp.a1, exp.r, exp.s2, exp.a2)

    def getHiddenLayerRepresentation(self,state):
        return self.NN.getHiddenLayerRepresentation(state)