Exemplos de SARSA em Python, exemplos de SARSA em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: Project7.py Projeto: asgray/ML-Class

def train_and_save_SAR_MDP(track_str, starts, gamma):
    test_track = Track(track_str + '.txt')
    test_mdp = MDP(test_track, algorithm='SARSA')
    SARSA.sarsa(test_mdp, starts, gamma=gamma)
    with open(f'pickles\{track_str}_{gamma}_SAR_pickle', 'wb') as file:
        pickle.dump(test_mdp, file)
    print('Training Complete')

Exemplo n.º 2

0

Exibir arquivo

 def __init__(self, alpha, epsilon, gamma, actionList, size):
     self.size = self.width, self.height = size
     self.actionList = actionList
     self.epsilon = epsilon
     self.agent = [
         SARSA.SARSA(alpha, epsilon, gamma, actionList),  #all
         SARSA.SARSA(alpha, epsilon, gamma, actionList),  #world
         SARSA.SARSA(alpha, epsilon, gamma, actionList),  #turtle
         SARSA.SARSA(alpha, epsilon, gamma, actionList),  #coin
     ]

Exemplo n.º 3

0

Exibir arquivo

Arquivo: LSPI_SARSA.py Projeto: irenge/RLPy

class LSPI_SARSA(Agent):
    def __init__(self,representation,policy,domain,logger, lspi_iterations = 5, sample_window = 100, epsilon = 1e-3, re_iterations = 100,initial_alpha =.1, lambda_=0,alpha_decay_mode ='dabney', boyan_N0 = 1000):
        self.SARSA = SARSA(representation, policy, domain,logger, initial_alpha, lambda_,alpha_decay_mode, boyan_N0)
        self.LSPI = LSPI(representation,policy,domain,logger, lspi_iterations, sample_window, epsilon, re_iterations)
        super(LSPI_SARSA,self).__init__(representation,policy,domain,logger)
    def learn(self,s,a,r,ns,na,terminal):
        self.LSPI.process(s,a,r,ns,na,terminal)        
        if self.LSPI.samples_count+1 % self.LSPI.steps_between_LSPI == 0:
            self.LSPI.representationExpansionLSPI()
            if terminal:
                self.episodeTerminated()
        else:
            self.SARSA.learn(s,a,r,ns,na,terminal)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: main.py Projeto: AI-Group13/Assignment_4

def read_argument():
    parser = argparse.ArgumentParser('Reinforcement Learning')

    parser.add_argument('goal_state_reward', type=float, help='The reward for reaching the goal state')
    parser.add_argument('pit_fall_reward', type=float, help='The reward for falling into a pit')
    parser.add_argument('move_reward', type=float, help='The reward for moving')
    parser.add_argument('give_up_reward', type=float, help='The reward for giving up')
    parser.add_argument('number_of_trials', type=int, help='The number of learning trials to run')
    parser.add_argument('exploration_epsilon', type=float, help='The weight for exploration')

    args = vars(parser.parse_args())

    env = environment.Environment(
        args['goal_state_reward'],
        args['pit_fall_reward'],
        args['move_reward'],
        args['give_up_reward'])

    sarsa = SARSA.SARSA(
        env,
        args['number_of_trials'],
        args['exploration_epsilon']
    )

    return env, sarsa

Exemplo n.º 5

0

Exibir arquivo

Arquivo: RMax.py Projeto: lono175/Planning

import EmptySARSA
import HORDQ
import SARSA
if __name__ == "__main__":

    alpha = 0.2
    epsilon = 0.1
    gamma = 0.9
    #ob = (-1, -1, -1, (1, 2))
    #ob2 = (-1, -1, -1, (5, 4))
    #ob3 = (-1, -1, -1, (0, 0))
    ob4 = (-1, -1, -1, (1, 1))
    punishment = 10
    isRORDQ = False
    hordQ = HORDQ.HORDQ(alpha, epsilon, gamma, [1, -1], isRORDQ)
    probQ = SARSA.SARSA(alpha, epsilon, gamma, [1, -1])
    controller = RMax(epsilon, gamma, hordQ, probQ, punishment)

    #unit test for get room
    val = controller.getRoom(ob4)
    print "value: ", val
    assert( val == 0)
    #controller.start(ob)
    #for i in range(0, 1000):
        #
        #controller.step(1, ob)
        #controller.step(1, ob)
        #controller.step(1, ob)
        #controller.step(1, ob2)
    #controller.end(10)

Exemplo n.º 6

0

Exibir arquivo

def BusRun(type, punishment, maxStep, isRORDQ, isRandomPlanner, isShow,
           framRate, loadFile):
    discrete_size = 6
    objSet = (1, 1)
    monsterMoveProb = 0.3
    isEpisodeEnd = False
    #maxStep = 200000
    size = 800, 800
    gridSize = (discrete_size, discrete_size)
    delay = 100
    interval = 50
    pygame.init()
    pygame.key.set_repeat(delay, interval)
    clock = pygame.time.Clock()
    screen = pygame.display.set_mode(size)

    actionList = ((0, 1), (0, -1), (1, 0), (-1, 0))
    #controller = RelationalQ.RelationalQ(0.05, 0.1, 0.9, actionList)
    alpha = 0.2
    probAlpha = 0.1
    gamma = 1
    if type == 'SARSA':
        epsilon = 0.1
        controller = SARSA.SARSA(alpha, epsilon, gamma, actionList)

    else:
        epsilon = 0.05
        #isRORDQ = True
        hordQ = HORDQ.HORDQ(alpha, epsilon, gamma, actionList, isRORDQ)
        probQ = SARSA.SARSA(probAlpha, epsilon, gamma, [0])
        if isRandomPlanner:
            epsilon = 1
        controller = RMax.RMax(epsilon, gamma, hordQ, probQ, punishment)
    if loadFile != '':
        print "load:", loadFile
        controller = tool.Load(loadFile)
    env = BusEnv((discrete_size, discrete_size), size, actionList)

    numOfTurtle = objSet[0]
    numOfCoin = objSet[1]

    print "# coin ", numOfCoin
    print "# Turtle ", numOfTurtle
    print "isEpisodeEnd ", isEpisodeEnd

    isTraining = not isEpisodeEnd

    count = 0

    totalReward = 0
    rewardList = []
    stepCount = 0
    while stepCount < maxStep:
        #randomly choose a sub goal at the beginning of the episode
        goalDiff = actionList[int(random.random() * len(actionList))]
        world = env.start(numOfTurtle, numOfCoin)
        action = controller.start(env.getSarsaFeature())

        count += 1
        prevStepCount = stepCount
        episodeReward = 0
        while stepCount < maxStep:
            if stepCount % 1000 == 0:
                print "Time: ", stepCount / 1000
            stepCount = stepCount + 1
            clock.tick(frameRate)
            reward, flag = env.step(action)
            fea = env.getSarsaFeature()
            totalReward = totalReward + reward
            episodeReward = episodeReward + reward
            if flag:
                controller.end(reward)
                break
            action = controller.step(reward, fea)

            for event in pygame.event.get():
                #action = 0
                if event.type == pygame.QUIT: sys.exit()
            if isShow:
                screen.blit(env.getScreen(), (0, 0))
                pygame.display.flip()
        rewardList.append((prevStepCount, stepCount, episodeReward))
    print totalReward
    #for conf in controller.agent:
    #print controller.agent[conf].Q
    #controller.dumpObj()
    #controller.dumpCoinAndGoal()
    #controller.dumpCoinAndGoalEx(controller.prob)
    #controller.dumpCoinAndGoalEx(controller.realReward)
    tool.Save(controller, type)
    tool.Save(rewardList, 'reward_' + type)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test.py Projeto: canmanietp/EatFoodAvoidPredator

import matplotlib.pyplot as plt
import numpy as np

import SARSA


def moving_average(a, n=3):
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n


S_rewards = SARSA.run(50000)
S_plot = moving_average(S_rewards, 5000)  # smooth plot with a moving average
plt.plot(S_plot)
plt.show()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: LSPI_SARSA.py Projeto: irenge/RLPy

 def __init__(self,representation,policy,domain,logger, lspi_iterations = 5, sample_window = 100, epsilon = 1e-3, re_iterations = 100,initial_alpha =.1, lambda_=0,alpha_decay_mode ='dabney', boyan_N0 = 1000):
     self.SARSA = SARSA(representation, policy, domain,logger, initial_alpha, lambda_,alpha_decay_mode, boyan_N0)
     self.LSPI = LSPI(representation,policy,domain,logger, lspi_iterations, sample_window, epsilon, re_iterations)
     super(LSPI_SARSA,self).__init__(representation,policy,domain,logger)

Exemplos de SARSA, rl-workshop em Python