Пример #1
0
def analyze_methods():
    i = 1
    n = 2  #number of grids

    LVL = "EASY"

    filename = "analyse_" + LVL

    while (i <= n):
        G = Game.random_generation(10 * i, 10 * i, LVL)

        if not G.is_winnable():
            continue

        ####--------------------- ITV
        # t1 = time.time()
        # policy = G.mdp.run_value_iteration(0.01)
        # t2 = time.time() - t1
        # nb_mouvement = G.play_with_policy(policy,False)
        #
        # f = open(filename+"_ITV","a")
        # f.write(str(nb_mouvement)+" "+str(t2)+" "+str(i*10)+" "+str(i*10)+"\n")
        # f.close()

        ####--------------------- LP
        # Game = new Game(".game")
        # t1 = time.time()
        # policy = G.mdp.run_linear_programming_resolution()
        # t2 = time.time() - t1
        # nb_mouvement = G.play_with_policy(policy,False)
        # f = open(filename+"_LP","a")
        # f.write(str(nb_mouvement)+" "+str(t2)+" "+str(i*10)+" "+str(i*10)+"\n")
        # f.close()

        ####--------------------- QL
        QL = QLearning(".game")

        t1 = time.time()
        policy = QL.run_Q_learning()
        t2 = time.time() - t1
        G = Game(".game")
        nb_mouvement = G.play_with_policy(policy, False)

        f = open(filename + "_QL", "a")
        f.write(
            str(nb_mouvement) + " " + str(t2) + " " + str(i * 10) + " " +
            str(i * 10) + "\n")
        f.close()

        i += 1
Пример #2
0
 def performAction(self, state):
     if self.lastNodeId:
         if self.isSuccess(self.nodes.getNode(self.lastStepId), state):
             self.nodes.getNode(self.lastNodeId).addSuccess()
             self.nodes.getNode(self.lastStepId).addSuccess()
     root = self.nodes.getNode(state)
     if root.isNew():
         root.children = self.setChildren(state, root)
         root.setScore(self.calculateScore(state))
         ql.run(root)
     nextState = self.getWinningPath(root.id)
     nextNode = self.nodes.getNode(nextState)
     self.lastStepId = nextNode.id
     self.updateProbabilities()
     self.nodes.saveNode(nextNode)
     self.nodes.saveNode(root)
Пример #3
0
def get_alg(name, args, env):
    if name == "oracleq":
        alg_dict = {'horizon': args.horizon,
                    'alpha': args.lr,
                    'conf': args.conf }
        alg = OracleQ.OracleQ(env.action_space.n, params=alg_dict)
    elif name == 'decoding':
        alg_dict = {'horizon': env.horizon,
                    'model_type': args.model_type,
                    'n': args.n,
                    'num_cluster': args.num_cluster}
        alg = Decoding.Decoding(env.observation_space.n, env.action_space.n,params=alg_dict)
    elif name=='qlearning':
        assert args.tabular, "[EXPERIMENT] Must run QLearning in tabular mode"
        alg_dict = {
            'alpha': float(args.lr),
            'epsfrac': float(args.epsfrac),
            'num_episodes': int(args.episodes)}
        alg = QLearning.QLearning(env.action_space.n, params=alg_dict)
    elif name == 'linq':
        alg_dict = {
            'horizon': env.horizon,
            'conf': args.conf
            }
        alg = LinQ.LinQ(env.observation_space.n, env.action_space.n,params=alg_dict)
    return (alg)
Пример #4
0
    def training_one(discountRate = 0.98, actionProbabilityBase = 1.8, trainingIterations = 20000, mountainCarBinsPerDimension = 20, render = False, randomActionRate = 0.01, learningRateScale = 0.01, use_memory=False):
        qlearner = QLearning.QLearning(stateSpaceShape=Assignment7Support.MountainCarStateSpaceShape(mountainCarBinsPerDimension), numActions=env.action_space.n, discountRate=discountRate)

        for trialNumber in range(trainingIterations):
            observation = env.reset()
            reward = 0

            qlearner.clear_record()
            for i in range(200):

                currentState = Assignment7Support.MountainCarObservationToStateSpace(observation, mountainCarBinsPerDimension)
                action = qlearner.GetAction(currentState, learningMode=True, randomActionRate=randomActionRate, actionProbabilityBase=actionProbabilityBase)

                oldState = Assignment7Support.MountainCarObservationToStateSpace(observation, mountainCarBinsPerDimension)
                observation, reward, isDone, info = env.step(action)
                newState = Assignment7Support.MountainCarObservationToStateSpace(observation, mountainCarBinsPerDimension)

                # learning rate scale
                qlearner.ObserveAction(oldState, action, newState, reward, learningRateScale=learningRateScale)

                if use_memory:
                    qlearner.record(oldState, action, newState, reward)

                if isDone:
                    if use_memory:
                        qlearner.replay(learningRateScale)
                #     if (trialNumber + 1) % 1000 == 0:
                #         print(trialNumber + 1, i + 1, np.min(qlearner.q_table), np.mean(qlearner.q_table))
                    break

        n = 20
        totalRewards = []
        for runNumber in range(n):
            observation = env.reset()
            totalReward = 0
            reward = 0
            for i in range(200):
                if render:
                    renderDone = env.render()

                currentState = Assignment7Support.MountainCarObservationToStateSpace(observation, mountainCarBinsPerDimension)
                observation, reward, isDone, info = env.step(qlearner.GetAction(currentState, learningMode=False))

                totalReward += reward

                if isDone:
                    if render:
                        renderDone = env.render()
                    # print(runNumber + 1, i + 1, totalReward)
                    totalRewards.append(totalReward)
                    break

        if render:
            env.close()

        average_score = sum(totalRewards) / float(len(totalRewards))
        print(f'[{datetime.datetime.now()}] The average score of this one attempt is {average_score}')

        return average_score
Пример #5
0
def main():

    mdp = MarkovDecisionProblem.MarkovDecisionProblem()

    vi = ValueIteration.ValueIteration(mdp)
    ql = QLearning.QLearning(mdp)
    
    ql.qlearning(iterations=15, exploration=0.2)
Пример #6
0
def test_cartpole():
    cartpole = QLearning.CartPole()
    policy_filename = "policy/policy_9600.npy"
    env = gym.make('CartPole-v0')

    env = env.unwrapped
    env = Monitor(env, "video", force=True)
    episodes = 1000
    step = 20000
    my_test(policy_filename, env, episodes, step, cartpole.discrete_util)
Пример #7
0
def test_mountaincar():
    mountaincar = QLearning.MountainCar()
    policy_filename = "policy/policy_5300.npy"
    env = gym.make('MountainCar-v0')

    env = env.unwrapped
    #env = Monitor(env, "video", force=True)
    episodes = 1000
    step = 2000
    my_test(policy_filename, env, episodes, step, mountaincar.discrete_util)
Пример #8
0
def test_acrobot():
    acrobot = QLearning.Acrobot()
    policy_filename = "policy/policy_9900.npy"
    env = gym.make('Acrobot-v1')

    env = env.unwrapped
    #env = Monitor(env, "video", force=True)
    episodes = 1000
    step = 2000
    my_test(policy_filename, env, episodes, step, acrobot.discrete_util)
Пример #9
0
def test():
    '''Create the MDP, then run an episode of random actions for 10 steps.'''
    initial = Peg.Initial_state
    state = Peg.State(initial)
    learn = QLearning.Q(state)
    learn.register_R(Peg.R)
    learn.register_all_moves(Peg.all_possible_moves)
    learn.register_pegs(Peg.number_of_pegs)

    learn.QLearning(0.9, 100, 0.05)
    print(QValues_string(learn.Q))
Пример #10
0
def get_alg(name, args, env):
    if name == "oracleq":
        alg_dict = {
            'horizon': args.horizon,
            'alpha': args.lr,
            'conf': args.conf
        }
        alg = OracleQ.OracleQ(env.action_space.n, params=alg_dict)
    elif name == "neural-e3":
        alg_dict = {
            'horizon': args.horizon,
            'dimension': args.dimension,
            'n_hidden': args.n_hidden,
            'n_ensemble': args.n_ensemble,
            'n_playouts': args.n_playouts,
            'n_samples': args.n_samples,
            'n_model_updates': args.n_model_updates,
            'ucb_c': args.ucb_c,
            'anchor': args.anchor,
            'lr': args.lr,
            'batch_size': 100,
            'n_actions': env.action_space.n,
            'conf': args.conf
        }
        alg = NeuralE3.NeuralE3(env.action_space.n, params=alg_dict)
    elif name == 'decoding':
        alg_dict = {
            'horizon': env.horizon,
            'model_type': args.model_type,
            'n': args.n,
            'num_cluster': args.num_cluster
        }
        alg = Decoding.Decoding(env.observation_space.n,
                                env.action_space.n,
                                params=alg_dict)
    elif name == 'uniform':
        alg_dict = {
            'horizon': env.horizon,
            'model_type': args.model_type,
            'n_actions': env.action_space.n,
            'n': args.n,
            'num_cluster': args.num_cluster
        }
        alg = Uniform.Uniform(env.action_space.n, params=alg_dict)
    elif name == 'qlearning':
        assert args.tabular, "[EXPERIMENT] Must run QLearning in tabular mode"
        alg_dict = {
            'alpha': float(args.lr),
            'epsfrac': float(args.epsfrac),
            'num_episodes': int(args.episodes)
        }
        alg = QLearning.QLearning(env.action_space.n, params=alg_dict)
    return (alg)
Пример #11
0
def ql(args):
    if len(args) >= 2:
        model = parse_model(args[0])
        num_steps = int(args[1])

        if len(args) >= 3:
            record_file = open(args[2], 'w')
        else:
            record_file = None

        if len(args) >= 4:
            learning_rate = float(args[3])
        else:
            learning_rate = None

        if len(args) >= 5:
            discount_rate = float(args[4])
        else:
            discount_rate = None

        if len(args) >= 6:
            e = float(args[5])
        else:
            e = None

        if learning_rate != None and discount_rate != None and e != None:
            ql = QLearning(model, learning_rate, discount_rate, e)
        elif learning_rate != None and discount_rate != None:
            ql = QLearning(model, learning_rate, discount_rate)
        elif learning_rate != None:
            ql = QLearning(model, learning_rate)
        else:
            ql = QLearning(model)

        if record_file != None:
            run(ql, num_steps, record_file)
        else:
            run(ql, num_steps)
    else:
        invalid()
Пример #12
0
    def initializeQLearning(self, Q=None):
        myQLearning = QLearning.QLearning(self.MDP,                 \
                                          self.Agent,               \
                                          self.alpha,               \
                                          self.gamma,               \
                                          self.epsilon,             \
                                          self.epsilonIncrement,    \
                                          1,                        \
                                          self.H,                   \
                                          Q          = Q,           \
                                          gammaPRQL  = self.gammaPRQL)

        return myQLearning
Пример #13
0
    def test_1(self):
        qlearner = QLearning.QLearning([2, 2], 2, 0.9)

        print("action 1")
        qlearner.ObserveAction([0, 0], 1, [0, 1], 1, learningRateScale=1.0)
        print(qlearner.Q)
        print("visit")
        print(qlearner.visits)

        print("action 2")
        qlearner.ObserveAction([0, 0], 1, [0, 1], 1, learningRateScale=1.0)
        print(qlearner.Q)
        print("visit")
        print(qlearner.visits)
Пример #14
0
def set_player(args, color, s):
    if args is 1:
        _players = P.Player()
    elif args is 2:
        _players = R.RandomAI()
    elif args is 3:
        p_color = color
        # 学習したデータがあればそれを読み込む
        if os.path.exists('./data/{}_move_4x4.pickle'.format(s)):
            with open('./data/{}_move_4x4.pickle'.format(s), 'rb') as f:
                print('ローディング中... ( 思った以上に時間がかかります )')
                _players = pickle.load(f)
                print('完了!!')
        else:
            _players = Q.QLearning(color)
    return _players
Пример #15
0
 def __init__(self, n, ysize, xsize, erate, eps, gamma, alpha, maxEpisodes, maxSteps, touch, capture):
     """
     Persuitの初期化
     :param erate: 獲物が移動に失敗する率
     """
     self.numOfAgents = n
     self.erate = erate
     self.dif = [[0,-1],[0,1],[-1,0],[1,0],[0,0]]
     self.ysize = ysize
     self.xsize = xsize
     self.touch = touch
     self.capture = capture
     dim = [self.ysize, self.xsize]
     for i in range(n):
         dim = dim + [self.ysize, self.xsize]
     self.ql = QLearning.QLearning(dim, pow(5,n), eps, gamma, alpha, maxEpisodes, maxSteps, self.inif, self.act, self.checkg, [touch, capture] )
     self.ql.alabel = self.mkActLabel(n) # 辞書ではなく配列に変更
Пример #16
0
    def __init__(self):
        super(CollectMineralsAndGas, self).__init__()

        data = ['SP Att.', 'SP Att.F', '#Depots', 'Ref. Att.', 'Ref. Att.F', '#Refineries', 'CC Att.', 'CC Att.f', 'Mins', 'Gas', 'IdleSCVs', 'CCs', 'Supply', 'Score']
        with open('/home/kenn/Development/sc2-bot/CustomAgents/scores.txt', 'w+') as f:
            f.write('{0[0]:<15}{0[1]:<15}{0[2]:<15}{0[3]:<15}{0[4]:<15}{0[5]:<15}{0[6]:<15}{0[7]:<15}{0[8]:<15}{0[9]:<15}{0[10]:<15}{0[11]:<15}{0[12]:<15}{0[13]:<15}\n'.format(data))

        self.qlearn = QLearning.QLearningTable(actions=list(range(len(smart_actions))))

        self.move_number = 0

        self.previous_action = None
        self.previous_state = None
        self.unit_type = None

        self.supply_depots = 0
        self.refineries = 0

        self.previous_collected_minerals_rate = 0
        self.previous_collected_vespene_rate = 0

        self.build_supply_depot_attempts = 0
        self.build_supply_depot_attempts_failed = 0

        self.build_cc_attempts = 0
        self.build_cc_attempt_failed = 0

        self.build_refinery_attempts = 0
        self.build_refinery_attempts_failed = 0

        self.builder_iterator = 0
        self.invocations = 0

        self.initializing = 0

        if os.path.isfile(DATA_FILE + '.gz'):
            self.qlearn.q_table = pd.read_pickle(DATA_FILE + '.gz', compression='gzip')
Пример #17
0
def main(maxGames, gamma, epsilon, bird_has_learned, q_values_counter):
    """The application's entry point.

    If someone executes this module (instead of importing it, for
    example), this function is called.
    """
    counter = 0
    QL = QLearning.Qvalue(gamma)

    if bird_has_learned==1 :
        QL.Q = q_values_counter

    reward = 10
    reward_die = -1000
    reward_pass = 1
    reward_ingap = 200
    scoreList = []
    avgScore = []

    filename_prefix = './q-attempt-auto-'
    filename = filename_prefix + str(gamma) + '-' + str(epsilon) + '.txt'
    f = open(filename, 'w+')

    pygame.init()

    while counter < maxGames:

        episode = []

        display_surface = pygame.display.set_mode((WIN_WIDTH, WIN_HEIGHT))
        pygame.display.set_caption('Pygame Flappy Bird')

        clock = pygame.time.Clock()
        score_font = pygame.font.SysFont(None, 32, bold=True)  # default font
        images = load_images()

        # the bird stays in the same x position, so bird.x is a constant
        # center bird on screen
        bird = Bird(50, int(WIN_HEIGHT/2 - Bird.HEIGHT/2), 2,
                    (images['bird-wingup'], images['bird-wingdown']))

        pipes = deque()

        nextPipes = deque()
        agent_y = None
        agent_status = True
        time_taken = []
        ActionList = []
        lastPipes = 0
        fcounter = 0


        frame_clock = 0  # this counter is only incremented if the game isn't paused
        score = 0
        done = paused = False
        while not done:
            clock.tick(FPS)

            # Handle this 'manually'.  If we used pygame.time.set_timer(),
            # pipe addition would be messed up when paused.
            if not (paused or frame_clock % msec_to_frames(PipePair.ADD_INTERVAL)):
                pp = PipePair(images['pipe-end'], images['pipe-body'])
                pipes.append(pp)
                nextPipes.append(pp)

            for e in pygame.event.get():
                if e.type == QUIT or (e.type == KEYUP and e.key == K_ESCAPE):
                    done = True
                    break
                elif e.type == KEYUP and e.key in (K_PAUSE, K_p):
                    paused = not paused
                elif e.type == MOUSEBUTTONUP or (e.type == KEYUP and
                        e.key in (K_UP, K_RETURN, K_SPACE)):
                    bird.msec_to_climb = Bird.CLIMB_DURATION


            ###############################  RL CODE ####################################################


            ######################################################################################################
            ####### QLearning
            ######################################################################################################
            if (fcounter%(FPS/4) == 0):
                newState = QLearning.QLState(bird,pipes)
                if bird_has_learned==1:
                    newAction = QLearning.epsilon_greedy(QL, 0.0, newState)
                else:
                    newAction = QLearning.epsilon_greedy(QL, min(0.1, epsilon/float(counter+1)), newState)
                if newAction == 'jump':
                    bird.msec_to_climb = Bird.CLIMB_DURATION
                episode.append((newState.short(),newAction))
            fcounter+=1

            if paused:
                continue  # don't draw anything

            # check for collisions
            pipe_collision = any(p.collides_with(bird) for p in pipes)
            if pipe_collision or 0 >= bird.y or bird.y >= WIN_HEIGHT - Bird.HEIGHT:
                done = True

            for x in (0, WIN_WIDTH / 2):
                display_surface.blit(images['background'], (x, 0))

            ############################## display predicted path ###################

            # for state in predState:
            #     display_surface.blit(state.bird.image,state.bird.rect)
            # predState.pop(0)
            ##########################################################################
            while pipes and not pipes[0].visible:
                pipes.popleft()

            for p in pipes:
                p.update()
                display_surface.blit(p.image, p.rect)

            bird.update()
            display_surface.blit(bird.image, bird.rect)

            # update and display score
            for p in pipes:
                if p.x + PipePair.WIDTH < bird.x and not p.score_counted:
                    score += 1
                    p.score_counted = True
                    nextPipes.popleft()

            score_surface = score_font.render(str(score), True, (255, 255, 255))
            score_x = WIN_WIDTH/2 - score_surface.get_width()/2
            display_surface.blit(score_surface, (score_x, PipePair.PIECE_HEIGHT))

            pygame.display.flip()
            frame_clock += 1

        if bird_has_learned != 1:            
            for i in range(len(episode)-2):
                if episode[i+1][0][1] >= 0 and episode[i+1][0][1] <= 3:
                    QL.update(episode[i][0],episode[i][1],reward_ingap,episode[i+1][0],counter)
                else:
                    QL.update(episode[i][0],episode[i][1],reward,episode[i+1][0],counter)

            QL.update(episode[len(episode)-2][0],episode[len(episode)-2][1],reward_die,episode[len(episode)-1][0],counter)
        print('Game over! Score: %i\tnum states:%i\tnum games:%i' % (score, len(QL.Q), counter))#        print(QL.Q)
        counter+=1
        if len(avgScore) == 0:
            avgScore.append(score)
        else:
            avgScore.append((avgScore[-1]*(counter-1)+ score)/float(counter))
        scoreList.append(score)


    pygame.quit()
    print(scoreList)
    print(avgScore)
    f.write(str(avgScore))
    f.write('\n')
    f.write(str(scoreList))
    f.write('\n')
    f.write(str(QL.Q))
    f.write('\n')
def main(maxGames, gamma, epsilon, bird_has_learned, q_values_counter):
    """The application's entry point.

    If someone executes this module (instead of importing it, for
    example), this function is called.
    """
    counter = 0
    QL = QLearning.Qvalue(gamma)

    if bird_has_learned == 1:
        QL.Q = q_values_counter

    reward = 10
    reward_die = -1000
    reward_pass = 1
    reward_ingap = 200
    scoreList = []
    avgScore = []

    filename_prefix = './q-attempt-auto-'
    filename = filename_prefix + str(gamma) + '-' + str(epsilon) + '.txt'
    f = open(filename, 'w+')

    pygame.init()

    while counter < maxGames:

        episode = []

        display_surface = pygame.display.set_mode((WIN_WIDTH, WIN_HEIGHT))
        pygame.display.set_caption('Pygame Flappy Bird')

        clock = pygame.time.Clock()
        score_font = pygame.font.SysFont(None, 32, bold=True)  # default font
        images = load_images()

        # the bird stays in the same x position, so bird.x is a constant
        # center bird on screen
        bird = Bird(50, int(WIN_HEIGHT / 2 - Bird.HEIGHT / 2), 2,
                    (images['bird-wingup'], images['bird-wingdown']))

        pipes = deque()

        nextPipes = deque()
        agent_y = None
        agent_status = True
        time_taken = []
        ActionList = []
        lastPipes = 0
        fcounter = 0

        frame_clock = 0  # this counter is only incremented if the game isn't paused
        score = 0
        done = paused = False
        while not done:
            clock.tick(FPS)

            # Handle this 'manually'.  If we used pygame.time.set_timer(),
            # pipe addition would be messed up when paused.
            if not (paused
                    or frame_clock % msec_to_frames(PipePair.ADD_INTERVAL)):
                pp = PipePair(images['pipe-end'], images['pipe-body'])
                pipes.append(pp)
                nextPipes.append(pp)

            for e in pygame.event.get():
                if e.type == QUIT or (e.type == KEYUP and e.key == K_ESCAPE):
                    done = True
                    break
                elif e.type == KEYUP and e.key in (K_PAUSE, K_p):
                    paused = not paused
                elif e.type == MOUSEBUTTONUP or (e.type == KEYUP and e.key
                                                 in (K_UP, K_RETURN, K_SPACE)):
                    bird.msec_to_climb = Bird.CLIMB_DURATION

            ###############################  RL CODE ####################################################

            ######################################################################################################
            ####### QLearning
            ######################################################################################################
            if (fcounter % (FPS / 4) == 0):
                newState = QLearning.QLState(bird, pipes)
                if bird_has_learned == 1:
                    newAction = QLearning.epsilon_greedy(QL, 0.0, newState)
                else:
                    newAction = QLearning.epsilon_greedy(
                        QL, min(0.1, epsilon / float(counter + 1)), newState)
                if newAction == 'jump':
                    bird.msec_to_climb = Bird.CLIMB_DURATION
                episode.append((newState.short(), newAction))
            fcounter += 1

            if paused:
                continue  # don't draw anything

            # check for collisions
            pipe_collision = any(p.collides_with(bird) for p in pipes)
            if pipe_collision or 0 >= bird.y or bird.y >= WIN_HEIGHT - Bird.HEIGHT:
                done = True

            for x in (0, WIN_WIDTH / 2):
                display_surface.blit(images['background'], (x, 0))

            ############################## display predicted path ###################

            # for state in predState:
            #     display_surface.blit(state.bird.image,state.bird.rect)
            # predState.pop(0)
            ##########################################################################
            while pipes and not pipes[0].visible:
                pipes.popleft()

            for p in pipes:
                p.update()
                display_surface.blit(p.image, p.rect)

            bird.update()
            display_surface.blit(bird.image, bird.rect)

            # update and display score
            for p in pipes:
                if p.x + PipePair.WIDTH < bird.x and not p.score_counted:
                    score += 1
                    p.score_counted = True
                    nextPipes.popleft()

            score_surface = score_font.render(str(score), True,
                                              (255, 255, 255))
            score_x = WIN_WIDTH / 2 - score_surface.get_width() / 2
            display_surface.blit(score_surface,
                                 (score_x, PipePair.PIECE_HEIGHT))

            pygame.display.flip()
            frame_clock += 1

        if bird_has_learned != 1:
            for i in range(len(episode) - 2):
                if episode[i + 1][0][1] >= 0 and episode[i + 1][0][1] <= 3:
                    QL.update(episode[i][0], episode[i][1], reward_ingap,
                              episode[i + 1][0], counter)
                else:
                    QL.update(episode[i][0], episode[i][1], reward,
                              episode[i + 1][0], counter)

            QL.update(episode[len(episode) - 2][0],
                      episode[len(episode) - 2][1], reward_die,
                      episode[len(episode) - 1][0], counter)
        print('Game over! Score: %i\tnum states:%i\tnum games:%i' %
              (score, len(QL.Q), counter))  #        print(QL.Q)
        counter += 1
        if len(avgScore) == 0:
            avgScore.append(score)
        else:
            avgScore.append(
                (avgScore[-1] * (counter - 1) + score) / float(counter))
        scoreList.append(score)

    pygame.quit()
    print(scoreList)
    print(avgScore)
    f.write(str(avgScore))
    f.write('\n')
    f.write(str(scoreList))
    f.write('\n')
    f.write(str(QL.Q))
    f.write('\n')
Пример #19
0
def get_learner(algorithm, model):
    if algorithm == "QLearning":
        if model.name == "SlipperyChain":
            return QLearning(model)
        elif model.name == "Loop":
            return QLearning(model)
        elif model.name == "LoopDeadEnd":
            return QLearning(model,0.2,0.99,0.001,0.999)
        elif model.name == "LoopDiffTrans":
            return QLearning(model,0.3,0.2,0.2,0.999)
        else:
            return QLearning(model)
            
    elif algorithm == "PrioritizedSweeping":
        if model.name == "SlipperyChain":
            return PrioritizedSweeping(model,2,0.2,0.99,0.9)
        elif model.name == "Loop":
            return PrioritizedSweeping(model,2,0.999,0.99,0.9)
        elif model.name == "LoopDeadEnd":
            return PrioritizedSweeping(model,5,0.999,0.99,0.9)
        elif model.name == "LoopDiffTrans":
            return PrioritizedSweeping(model,5,0.8,0.99,0.9)
        else:
            return PrioritizedSweeping(model)
        
        
    elif algorithm == "PrioritizedSweepingPolicy":
        return PrioritizedSweepingPolicy(model)
        
    elif algorithm == "PrioritizedSweepingHeuristics":
        if model.name == "SlipperyChain":
            return PrioritizedSweepingHeuristics(model,2,0.9,0.99,0.9)
        elif model.name == "Loop":
            return PrioritizedSweepingHeuristics(model,2,0.9,0.99,0.9)
        elif model.name == "LoopDeadEnd":
            return PrioritizedSweepingHeuristics(model,1,0.999,0.999,0.999)
        elif model.name == "LoopDiffTrans":
            return PrioritizedSweepingHeuristics(model,5,0.9,0.99,0.9)
        else:
            return PrioritizedSweepingHeuristics(model)
        
    elif algorithm == "QLearn":
        return QLearn(model)
        
    elif algorithm == "PrioritizedQLearning":
        return PrioritizedQLearning(model)
        
    elif algorithm == "BayesDP":
        if model.name == "SlipperyChain":
            return BayesPrioritizedSweeping(model,10,0.9,1,0.2,20)
        elif model.name == "Loop":
            return BayesPrioritizedSweeping(model,10,0.9,1,0.2,20)
        elif model.name == "LoopDeadEnd":
            return BayesPrioritizedSweeping(model)
        elif model.name == "LoopDiffTrans":
            return BayesPrioritizedSweeping(model,1,0.2,1,0.2,20)
        else:
            return BayesPrioritizedSweeping(model)
            
    else:
        raise Exception(algorithm + " not found")
Пример #20
0
def main():
    """The application's entry point.

    If someone executes this module (instead of importing it, for
    example), this function is called.
    """
    counter = 0 
    maxGames = 500
    QL = QLearning.Qvalue()
    # QL.Q = Counter({((7, 1), 'jump'): 7.857272463519327, ((7, 1), 'stay'): 7.741245285312653, ((9, 1), 'jump'): 6.974197946538579, ((9, 3), 'stay'): 2.4728502681599998, ((4, -2), 'jump'): 1.6991999999999998, ((7, 4), 'jump'): 1.656, ((4, -2), 'stay'): 1.6416, ((9, -2), 'jump'): 1.1663999999999999, ((4, 6), 'jump'): 0.96, ((2, -2), 'stay'): 0.9359999999999999, ((4, 6), 'stay'): 0.9359999999999999, ((2, 7), 'jump'): 0.6, ((7, -2), 'jump'): 0.6, ((5, 4), 'stay'): 0.6, ((9, -1), 'stay'): -80.54968253005099, ((9, 0), 'jump'): -97.33756762163051, ((5, 1), 'jump'): -99.28331164679123, ((-1, 2), 'jump'): -100.7318714988479, ((9, 0), 'stay'): -105.70532659645818, ((5, -1), 'stay'): -116.91400116117696, ((0, 1), 'stay'): -120.35252404640816, ((7, 0), 'stay'): -130.22648878053943, ((5, 1), 'stay'): -133.71809218288863, ((5, 0), 'jump'): -148.9099124363754, ((-3, 1), 'stay'): -154.4543708087079, ((7, 0), 'jump'): -156.57084541134142, ((5, 0), 'stay'): -162.0850310531728, ((4, 1), 'jump'): -171.2932661473332, ((4, 0), 'jump'): -179.7965693246642, ((-1, 2), 'stay'): -182.8699917288755, ((2, 1), 'jump'): -186.62179477555162, ((0, 0), 'jump'): -192.10570383523043, ((4, 0), 'stay'): -193.63696553046788, ((7, -2), 'stay'): -199.4327808, ((-1, 0), 'stay'): -202.9226452728635, ((-3, 0), 'jump'): -209.71099375511983, ((9, 2), 'jump'): -211.6669386541629, ((-1, 1), 'stay'): -216.87973668175724, ((0, 0), 'stay'): -217.877507717578, ((2, 0), 'stay'): -222.32663278238968, ((7, 2), 'stay'): -246.34051949020602, ((2, 1), 'stay'): -246.74031192502184, ((1, 0), 'jump'): -257.4947922630647, ((-1, 1), 'jump'): -257.66471443706297, ((1, 1), 'stay'): -262.18436199797634, ((1, 1), 'jump'): -268.6598699141706, ((-3, 1), 'jump'): -269.1352608305162, ((5, -2), 'stay'): -274.63086373166846, ((0, 1), 'jump'): -282.6151560499842, ((2, 0), 'jump'): -284.34034051851995, ((4, 5), 'jump'): -300.6624, ((9, 2), 'stay'): -345.8247595834438, ((9, -1), 'jump'): -348.47208661575024, ((5, 4), 'jump'): -356.20995840000006, ((2, 6), 'stay'): -359.15999999999997, ((2, 6), 'jump'): -359.4, ((9, 1), 'stay'): -359.8062183772605, ((4, -1), 'stay'): -400.5373431989693, ((7, -1), 'stay'): -415.27671308280446, ((9, 3), 'jump'): -466.85968987967163, ((9, -2), 'stay'): -481.6451347036986, ((-1, 0), 'jump'): -505.0351877771873, ((2, -2), 'jump'): -560.6256, ((4, 5), 'stay'): -576.9926399999999, ((1, 6), 'jump'): -600.0, ((1, 7), 'stay'): -600.0, ((1, -2), 'jump'): -600.0, ((7, -1), 'jump'): -603.1319891393689, ((-3, 0), 'stay'): -638.8181874316656, ((-3, 2), 'stay'): -665.6193584360807, ((4, 1), 'stay'): -706.3801965210189, ((-3, 2), 'jump'): -711.8758049693784, ((5, -1), 'jump'): -736.2986304823946, ((4, 4), 'stay'): -800.0267886182401, ((2, -1), 'stay'): -804.4422906834702, ((1, 0), 'stay'): -838.2476058673772, ((1, -3), 'stay'): -840.0, ((4, -1), 'jump'): -852.0242460274835, ((7, 4), 'stay'): -860.9277966754564, ((7, 3), 'stay'): -865.0574039234211, ((4, 4), 'jump'): -865.9002218495999, ((7, 3), 'jump'): -878.6417536488161, ((2, 5), 'jump'): -888.9302400000001, ((2, 5), 'stay'): -888.9456, ((4, 3), 'stay'): -934.4293978218991, ((1, 6), 'stay'): -936.0, ((2, 4), 'stay'): -956.7407422463999, ((5, 3), 'jump'): -972.1059646694266, ((1, -2), 'stay'): -974.4000000000001, ((1, 5), 'jump'): -974.4000000000001, ((7, 2), 'jump'): -984.7275651949986, ((5, 2), 'jump'): -988.945812073332, ((5, 3), 'stay'): -992.5832848500062, ((5, 2), 'stay'): -995.5538804111354, ((1, 5), 'stay'): -995.904, ((2, -1), 'jump'): -997.512428804205, ((4, 2), 'jump'): -997.6603770175502, ((4, 2), 'stay'): -997.9212840472369, ((4, 3), 'jump'): -997.9374335860218, ((2, 4), 'jump'): -998.88964786176, ((2, 2), 'stay'): -998.9614614721633, ((2, 3), 'jump'): -998.9974237927165, ((2, 2), 'jump'): -998.9982333417834, ((2, 3), 'stay'): -998.9994806521587, ((1, -1), 'jump'): -999.95805696, ((1, 4), 'stay'): -999.9932891136, ((1, 4), 'jump'): -999.998926258176, ((1, 2), 'stay'): -999.9997408910938, ((1, 3), 'jump'): -999.9999725122093, ((1, 2), 'jump'): -999.9999992963126, ((1, 3), 'stay'): -999.999999718525, ((1, -1), 'stay'): -1049.2908903268349})
    
    reward = 10
    reward_die = -1000
    reward_pass = 10
    scoreList = []
    avgScore = []

    pygame.init()

    while counter < maxGames:

        episode = []

        display_surface = pygame.display.set_mode((WIN_WIDTH, WIN_HEIGHT))
        pygame.display.set_caption('Pygame Flappy Bird')

        clock = pygame.time.Clock()
        score_font = pygame.font.SysFont(None, 32, bold=True)  # default font
        images = load_images()

        # the bird stays in the same x position, so bird.x is a constant
        # center bird on screen
        bird = Bird(50, int(WIN_HEIGHT/2 - Bird.HEIGHT/2), 2,
                    (images['bird-wingup'], images['bird-wingdown']))

        pipes = deque()

        nextPipes = deque()
        agent_y = None
        agent_status = True
        time_taken = []
        ActionList = []
        lastPipes = 0 
        fcounter = 0


        frame_clock = 0  # this counter is only incremented if the game isn't paused
        score = 0
        done = paused = False
        while not done:
            clock.tick(FPS)

            # Handle this 'manually'.  If we used pygame.time.set_timer(),
            # pipe addition would be messed up when paused.
            if not (paused or frame_clock % msec_to_frames(PipePair.ADD_INTERVAL)):
                pp = PipePair(images['pipe-end'], images['pipe-body'])
                pipes.append(pp)
                nextPipes.append(pp)

            for e in pygame.event.get():
                if e.type == QUIT or (e.type == KEYUP and e.key == K_ESCAPE):
                    done = True
                    break
                elif e.type == KEYUP and e.key in (K_PAUSE, K_p):
                    paused = not paused
                elif e.type == MOUSEBUTTONUP or (e.type == KEYUP and
                        e.key in (K_UP, K_RETURN, K_SPACE)):
                    bird.msec_to_climb = Bird.CLIMB_DURATION


            ###############################  RL CODE ####################################################

            
            ######################################################################################################
            ####### QLearning
            ######################################################################################################
            
            if (fcounter%(FPS/8) == 0):
                newState = QLearning.QLState(bird,pipes)
                if counter%10 == 0:
                    newAction = QLearning.epsilon_greedy(QL,0,newState)
                else:
                    newAction = QLearning.epsilon_greedy(QL,min(0.6,10/math.sqrt(counter+1)),newState)
                if newAction == 'jump':
                    bird.msec_to_climb = Bird.CLIMB_DURATION
                episode.append((newState.short(),newAction))
            fcounter+=1

            if paused:
                continue  # don't draw anything

            # check for collisions
            pipe_collision = any(p.collides_with(bird) for p in pipes)
            if pipe_collision or 0 >= bird.y or bird.y >= WIN_HEIGHT - Bird.HEIGHT:
                done = True

            for x in (0, WIN_WIDTH / 2):
                display_surface.blit(images['background'], (x, 0))

            ############################## display predicted path ###################
            
            # for state in predState:
            #     display_surface.blit(state.bird.image,state.bird.rect)
            # predState.pop(0)
            ##########################################################################
            while pipes and not pipes[0].visible:
                pipes.popleft()

            for p in pipes:
                p.update()
                display_surface.blit(p.image, p.rect)

            bird.update()
            display_surface.blit(bird.image, bird.rect)

            # update and display score
            for p in pipes:
                if p.x + PipePair.WIDTH < bird.x and not p.score_counted:
                    score += 1
                    p.score_counted = True
                    nextPipes.popleft()

            score_surface = score_font.render(str(score), True, (255, 255, 255))
            score_x = WIN_WIDTH/2 - score_surface.get_width()/2
            display_surface.blit(score_surface, (score_x, PipePair.PIECE_HEIGHT))

            pygame.display.flip()
            frame_clock += 1

        for i in range(len(episode)-2):
            if episode[i+1][0][1] >= 0 and episode[i+1][0][1] <= 3:
                QL.update(episode[i][0],episode[i][1],reward_pass,episode[i+1][0],counter)
            else:
                QL.update(episode[i][0],episode[i][1],reward,episode[i+1][0],counter)
        QL.update(episode[len(episode)-2][0],episode[len(episode)-2][1],reward_die,episode[len(episode)-1][0],counter)
        print('Game over! Score: %i' % score)
#        print(QL.Q)
        counter+=1

        print(counter)
        if (counter-1) == 0:
            avgScore.append(score)
        elif((counter-1)%10 == 0):
            avgScore.append(avgScore[-1]*(counter-1)/counter + score/counter)
        if (counter-1)%10==0:
            scoreList.append(score)
Пример #21
0
    def training_one(runs_index):
        qlearner = QLearning.QLearning(
            stateSpaceShape=Assignment7Support.CartPoleStateSpaceShape(),
            numActions=env.action_space.n,
            discountRate=discountRate)

        print(
            f'[{datetime.datetime.now()}] Start training, runs id {runs_index + 1}'
        )
        for trialNumber in range(trainingIterations):
            observation = env.reset()
            reward = 0
            for i in range(300):
                #env.render()

                currentState = Assignment7Support.CartPoleObservationToStateSpace(
                    observation)
                action = qlearner.GetAction(
                    currentState,
                    learningMode=True,
                    randomActionRate=randomActionRate,
                    actionProbabilityBase=actionProbabilityBase)

                oldState = Assignment7Support.CartPoleObservationToStateSpace(
                    observation)
                observation, reward, isDone, info = env.step(action)
                newState = Assignment7Support.CartPoleObservationToStateSpace(
                    observation)

                qlearner.ObserveAction(oldState,
                                       action,
                                       newState,
                                       reward,
                                       learningRateScale=learningRateScale)

                if isDone:
                    # if (trialNumber + 1) % 1000 == 0:
                    #     print(trialNumber + 1, i + 1, np.max(qlearner.q_table), np.mean(qlearner.q_table))
                    break
        print(
            f'[{datetime.datetime.now()}] End of the traininig, runs id {runs_index + 1}'
        )

        ## Now do the best n runs I can
        # input('Enter to continue...')

        n = 20
        totalRewards = []
        for runNumber in range(n):
            observation = env.reset()
            totalReward = 0
            reward = 0
            for i in range(300):
                # renderDone = env.render()

                currentState = Assignment7Support.CartPoleObservationToStateSpace(
                    observation)
                observation, reward, isDone, info = env.step(
                    qlearner.GetAction(currentState, learningMode=False))

                totalReward += reward

                if isDone:
                    # renderDone = env.render()
                    # print(runNumber + 1, i + 1, totalReward)
                    totalRewards.append(totalReward)
                    break

        # env.close()

        average_score = sum(totalRewards) / float(len(totalRewards))
        print(
            f'[{datetime.datetime.now()}] End of the Test, runs id {runs_index + 1}'
        )
        print(f'runs id {runs_index + 1}, {totalRewards}')
        print(f'Your Score: {average_score}, runs id {runs_index + 1}')
        return average_score
Пример #22
0
    if opt == '-h':
        usage()
        sys.exit()
    elif opt in ("-l"):
        print "-l seen"
        levelfile = arg
    elif opt in ("-k"):
        k = int(arg)
    elif opt in ("-a"):
        a = arg
    elif opt in ("-y"):
        y = arg
    elif opt in ("-m"):
        m = arg
    elif opt in ("-t"):
        t = arg
    elif opt in ("-x"):
        x = arg

try:
    flatland = Flatland.Flatland(0, levelfile)
except:
    print "problem loading level file"
    usage()
    sys.exit(2)

q = QLearning.QLearning(k, levelfile, a, y, m, t, x)
q.learn()
q.run(False, [1.0], True)
while 1:
    q.run(True, [1.0], True)
Пример #23
0
from QLearning import *
from ChainModel import *

ps = QLearning(SlipperyChainModel(0.1), 0.5, 0.8, 0)
for i in range(10000):
    print ps.next()
# expect state 5 to have the highest potential
# for state in ps.model.states:
# 	print ps.get_v(state)

# for i in range(1, 6):
# 	print "transition model"
# 	print ps.get_transition_table(ps.model.state[i], ps.model.act_a)
# 	print ps.get_transition_table(ps.model.state[i], ps.model.act_b)
# 	print "reward model"
# 	print ps.get_reward_table(ps.model.state[i], ps.model.act_a)
# 	print ps.get_reward_table(ps.model.state[i], ps.model.act_b)
    createTransitionProbabilityDict = Transition.CreateTransitionProbabilityDict(
        transitionFunction)
    transitionFromStateAndAction = Transition.TransitionFromStateAndAction(
        worldRange)
    transitionProbabilityDict = createTransitionProbabilityDict(
        stateList, actionList)
    createRewardDict = Reward.MultiTargetsRewardDict(stateList, actionList,
                                                     targetReward)
    runValueIteration = ValueIteration.ValueIteration(stateList, actionList,
                                                      decayRate,
                                                      convergeThreshold,
                                                      maxIterationStep)
    createPolicyFromValue = ValueIteration.PolicyFromValue(
        stateList, actionList, decayRate)
    runQLearning = QLearning.QLearning(alpha, gamma, epsilon,
                                       segmentTotalNumber, stateList,
                                       actionList,
                                       transitionFromStateAndAction)

    print('finish setting function', time.time() - time0)
    trainWolfPolicy = TrainWolfPolicyValueIteration(stateList,
                                                    transitionProbabilityDict,
                                                    createRewardDict,
                                                    runValueIteration,
                                                    createPolicyFromValue)
    # trainWolfPolicy = TrainWolfPolicyQLearning(stateList, createRewardDict, runQLearning)
    wolfPolicy = trainWolfPolicy()
    # print(wolfPolicy)
    print('finish training policy', time.time() - time0)

    print('begin saving policy, please wait')
    Writer.savePolicyToPkl(wolfPolicy, savePolicyFilename)
Пример #25
0
from time import sleep

import Tictactoe
import Tablero
import Jugador
import QLearning

ai = QLearning.QLearning()
ai2 = QLearning.QLearning()

jugador1 = Jugador.Jugador("player1", 'X', ai)
jugador2 = Jugador.Jugador("player2", 'O', None)

jugadores = []
jugadores.append(jugador1)
jugadores.append(jugador2)

tablero = Tablero.Tablero()
game = Tictactoe.tictactoe(tablero, jugadores)

game.encender()

#while True:
#    game.new_game()
#    if game.iterations == 3:
#        game.new_game(t="q")
#        break
Пример #26
0
    axis = 0)
next_states_377 = np.concatenate(
    (next_states_377_train, next_states_377_test),
    axis = 0)
actions_377 = np.concatenate(
    (actions_377_train, actions_377_test),
    axis = 0)
rewards_377 = np.concatenate(
    (rewards_377_train, rewards_377_test),
    axis = 0)

trajectories_377 = trajectories_377_train + trajectories_377_test

print '-------------------Evaluation for Q377--------------------------------'
# evaluation for q377
state_rewards_377 = ql.estimate_rewards(next_states_377_train, actions_377_train, rewards_377_train, action_q377)
discounted_rewards_377 = ql.discount_rewards(state_rewards_377, discount)
discounted_max_states_377 = ql.get_max_reward_states(discounted_rewards_377)
max_states_377 = ql.get_max_reward_states(state_rewards_377)
q377_policy = QLearnedPolicy(discounted_max_states_377[0], q377_labels)

print 'Reward of max state = {a}, discounted max state = {b}'.format(
    a = state_rewards_377[max_states_377[0]], b = state_rewards_377[discounted_max_states_377[0]])
print 'Discounted reward of max state = {a}, discounted max state = {b}'.format(
    a = discounted_rewards_377[max_states_377[0]], b = discounted_rewards_377[discounted_max_states_377[0]])
print 'Max state actions: {a} \nDiscounted max state actions: {b}'.format(
    a = q377_labels[np.array(max_states_377[0]).astype(int) == 1], b = q377_labels[np.array(discounted_max_states_377[0]).astype(int) == 1])

action_counts_377 = defaultdict(lambda: defaultdict(int))
for s,a in zip(states_377,actions_377):
    action_counts_377[tuple(s)][a] += 1
Пример #27
0
            return unmapper([cord[0] - 1, cord[1]], dim)

        elif action == 2:

            return unmapper([cord[0], cord[1] - 1], dim)

        else:

            return unmapper([cord[0] + 1, cord[1]], dim)


R0 = np.zeros([36, 4])
R0[32, 0] = 100

learning = QLearning(range(0, 36), range(0, 4), R0, 0.5, 1)

state = 23

for i in range(0, 10000):

    action = learning.choose_action(state, 1 - float(i) / 100)
    next_state = apply_model(state, action)
    learning.update_model(state, action, next_state)
    if mod(i, 100) == 0:
        state = 23
    else:
        state = next_state

    plt.clf()
gw.DrawMap(np.reshape(np.max(learning.Q, 1), [6, 6]), model)
Пример #28
0
            return unmapper([cord[0] - 1, cord[1]], dim)

        elif action == 2:

            return unmapper([cord[0], cord[1] - 1], dim)

        else:

            return unmapper([cord[0] + 1, cord[1]], dim)


R0 = np.zeros([dim[0] * dim[1], 4])
goal = find_free(model)
R0[goal, 0] = 100

learning = QLearning(range(0, dim[0] * dim[1]), range(0, 4), R0, 0.5, 1)

s0 = goal

while s0 == goal:

    s0 = find_free(model)

plt.close()
plt.figure(figsize=(dim[1] / 2, dim[0]), facecolor='w')

for _ in range(100):

    preview = np.zeros(dim)

    state = s0
Пример #29
0
        ma.update_state(card3, third)

        ##################################################################
        wp = ma.winning_card(ma.stack)
        points_dict[played[wp]] += ma.eval_stack(ma.stack)
        ma.clear_state()

    return points_dict


if __name__ == "__main__":

    #player can be anything from 1_1, 2_2, 3_3, random, LW, LB, LBB
    play_modes = ["1_1", "2_2", "3_3", "random", "LW", "LB", "LWW", "LBB"]
    qlearning = QLearning(0.1, 0.1, 0.1)

    for mode1 in play_modes:
        for mode2 in play_modes:
            for mode3 in play_modes:
                print(f"Working on: {mode1} {mode2} {mode3}")
                count = 0
                file = open(mode1 + " " + mode2 + " " + mode3, "w")

                s = ""
                while count < 1000:
                    print(count / 1000)

                    #init data to store into file
                    data = dict()
Пример #30
0
def main():
    Default = 0
    QLearning = 1
    Genetic = 2

    C.initialize()

    #Initialize pygame and window surface.
    pygame.init()
    win = pygame.display.set_mode((C.WINDOW_WIDTH, C.WINDOW_HEIGHT))
    pygame.display.set_caption("Asteroids Genetic Algorithm")
    timer = pygame.time.Clock()

    #Initialize Q-Learning.
    if MODE == QLearning:
        QTRAINING = True
        Q.Q_Matrix = Q.initialize()
        actiontimer = 0
        action = 0
        currentaction = 0
        oldstateval = 0
        oldscore = 0
        prevQscore = 0

    #Initialize level one asteroids ().
    LEVEL = 1
    asteroids = []
    asteroids = generateAsteroids(asteroids, LEVEL)

    #Initialize scoreboard.
    SCORE = 0
    if C.DISPLAY_GAME:
        font = pygame.font.Font('Vector_Battle.ttf', 24)
        font.set_bold(True)
        show_score = font.render('SCORE: 0', True, C.WHITE, C.BLACK)
        scoreboard = show_score.get_rect()
        scoreboard.center = (150, 50)

    #Initialize player sprite.
    player = Player(C.WINDOW_WIDTH / 2, C.WINDOW_HEIGHT / 2, 0)
    if C.DISPLAY_GAME:
        ship = pygame.image.load(player.IMAGE)
        ship = pygame.transform.rotate(ship, -90)
        ship = pygame.transform.scale(ship, (C.PLAYERSIZE, C.PLAYERSIZE))

        #Initialize state display.
        show_state = font.render('State: ' + ' '.join(player.state), True,
                                 C.WHITE, C.BLACK)
        statedisplay = show_state.get_rect()
        statedisplay.center = (535, 150)

    #Initialize projectiles.
    projectiles = []

    #Initialize timers.
    respawntime = 0

    run = True

    if MODE == Genetic:
        population = [GA.random_chromosome() for _ in range(GA.PopulationSize)]
        fitness_scores = [0 for i in range(GA.PopulationSize)]
        for each in range(len(population)):
            fitness_scores[each] = simulate(newGameContainer(),
                                            population[each])
            print(fitness_scores[each])
        average = GA.average_fitness(fitness_scores)
        print("avg fitness: " + str(average))

        i = 0
        while i < GA.NumIterations:
            i += 1
            population, fitness_scores = GA.breed(population, fitness_scores)
            average = GA.average_fitness(fitness_scores)
            print("avg-fitness: " + str(average))
        best_chromosome = population[GA.best_solution(fitness_scores)]

    while run:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                run = False

        #If using Q-Learning, train the Q-Matrix when the action timer runs out.
        if MODE == QLearning:
            actiontimer += 1
            if actiontimer == C.FRAMES_PER_ACTION:
                actiontimer = 0
                reward = SCORE - oldscore
                oldscore = SCORE
                nextbest = Q.Q_Matrix[C.state.index(
                    player.state)][Q.greedy_choice(player.state)]
                Q.Q_Matrix[oldstateval][action] = prevQscore + C.stepsize * (
                    reward + C.discount * nextbest - prevQscore)
                oldstateval = C.state.index(player.state)
                action = Q.choose_action(player.state)
                prevQscore = Q.Q_Matrix[oldstateval][action]
                currentaction = C.actions[action]

        if MODE == Genetic:
            action = GA.updateAction(player, best_chromosome)
            executeAction(player, projectiles, action)

        #Choose an action, based on current key press or Q-Learning decision.
        keys = pygame.key.get_pressed()
        if (MODE == QLearning
                and currentaction == 'Left') or keys[pygame.K_LEFT]:
            player.rotation += 5
        if (MODE == QLearning
                and currentaction == 'Right') or keys[pygame.K_RIGHT]:
            player.rotation -= 5
        if (MODE == QLearning
                and currentaction == 'Thrust') or keys[pygame.K_UP]:
            if player.speed <= C.MAXSPEED: player.speed += C.THRUST
            del player.thrustvectors[0]
            player.thrustvectors.append([player.speed, player.rotation])
        if (MODE == QLearning
                and currentaction == 'Shoot') or keys[pygame.K_SPACE]:
            if not player.firing: projectiles.append(fireProjectile(player))
            player.firing = True
        if MODE == QLearning:
            if currentaction != 'Shoot': player.firing = False
        else:
            if not keys[pygame.K_SPACE]: player.firing = False

        #Update player, asteroids, projectiles, SCORE, LEVEL and state.
        rays = sense(player, asteroids)
        projectiles = detectProjectileColision(asteroids, projectiles)
        SCORE += updateScore(player, asteroids)
        player.score = SCORE
        updatePlayer(player)
        LEVEL = updateAsteroids(asteroids, LEVEL)
        updateProjectiles(projectiles)

        #Draw the game.
        if C.DISPLAY_GAME:
            drawGame(player, ship, asteroids, projectiles, scoreboard, SCORE,
                     statedisplay, rays, font, win)

        timer.tick(C.FPS)

    if MODE != 2: pygame.quit()
    if C.SAVEQMATRIX: saveQmatrix(Q.Q_Matrix)
Пример #31
0
    axis = 0)
next_states_315 = np.concatenate(
    (next_states_315_train, next_states_315_test),
    axis = 0)
actions_315 = np.concatenate(
    (actions_315_train, actions_315_test),
    axis = 0)
rewards_315 = np.concatenate(
    (rewards_315_train, rewards_315_test),
    axis = 0)

trajectories_315 = trajectories_315_train + trajectories_315_test

# evaluation for q315
print '-------------------Evaluation for Q315--------------------------------'
state_rewards_315 = ql.estimate_rewards(next_states_315_train, actions_315_train, rewards_315_train, action_q315)
discounted_rewards_315 = ql.discount_rewards(state_rewards_315, discount)
discounted_max_states_315 = ql.get_max_reward_states(discounted_rewards_315)
max_states_315 = ql.get_max_reward_states(state_rewards_315)
q315_policy = QLearnedPolicy(discounted_max_states_315[0], q315_labels)

print 'Reward of max state = {a}, discounted max state = {b}'.format(
    a = state_rewards_315[max_states_315[0]], b = state_rewards_315[discounted_max_states_315[0]])
print 'Discounted reward of max state = {a}, discounted max state = {b}'.format(
    a = discounted_rewards_315[max_states_315[0]], b = discounted_rewards_315[discounted_max_states_315[0]])
print 'Max state actions: {a} \nDiscounted max state actions: {b}'.format(
    a = q315_labels[np.array(max_states_315[0]).astype(int) == 1], b = q315_labels[np.array(discounted_max_states_315[0]).astype(int) == 1])

action_counts_315 = defaultdict(lambda: defaultdict(int))
for s,a in zip(states_315,actions_315):
    action_counts_315[tuple(s)][a] += 1
Пример #32
0
if __name__ == "__main__":
    args = sys.argv  # reading filename from command-line arguments
    argc = len(args)
    if (argc != 5):
        print(
            'usage : $ python', args[0],
            'maze_map(.txt file) episode(number) reward(0-1 float) punishment(0-1 float)'
        )
        exit()
    n_episode = int(args[2])  # the number of episodes
    reward = float(args[3])  # reward
    punishment = float(args[4])  # punishment

    # fix seed for reproducibility(再現性)
    random.seed(0)
    np.random.seed(0)

    maze_map = Map.Map(args[1])
    maze_map.printmap()  # print maze (initial state)
    agent = Agent.Agent(maze_map)
    qtable = QTable.QTable(maze_map)
    qlearning = QLearning.QLearning(n_episode, reward, punishment)

    # execute QLearning
    qlearning.execute_qlearning(maze_map, agent, qtable)

    # printing result
    qtable.print_result(maze_map)

#end of program
Пример #33
0
from QLearning import *
from ChainModel import *

ps = QLearning(SlipperyChainModel(0.1), 0.5, 0.8, 0)
for i in range(10000):
	print ps.next()
# expect state 5 to have the highest potential
# for state in ps.model.states:
# 	print ps.get_v(state)

# for i in range(1, 6):
# 	print "transition model"
# 	print ps.get_transition_table(ps.model.state[i], ps.model.act_a)
# 	print ps.get_transition_table(ps.model.state[i], ps.model.act_b)
# 	print "reward model"
# 	print ps.get_reward_table(ps.model.state[i], ps.model.act_a)
# 	print ps.get_reward_table(ps.model.state[i], ps.model.act_b)
Пример #34
0
REFRESH_TIME = 40
SIZE = 480
tk_obj = Tk()

canvas = Canvas(tk_obj, background="white", width=SIZE, height=SIZE)

top_wall = canvas.create_rectangle((0, 0, SIZE, 10), fill="black")
bot_wall = canvas.create_rectangle((0, SIZE-10, SIZE, SIZE), fill="black")
computer = canvas.create_rectangle((SIZE-10, 240, SIZE, 144), fill="black")
player = canvas.create_rectangle((0, 240, 10, 144), fill="black")

ball = None
x_velocity = 14.4
y_velocity = 4.8

test_games = 0
learned = False

q_table = QLearning.get_table()
for i in range (12):
    for j in range(12):
        print(q_table[i][j])

canvas.pack()
reset_ball()

tk_obj.bind("<KeyPress-Up>", move_up)
tk_obj.bind("<KeyPress-Down>", move_down)

tk_obj.after(REFRESH_TIME, refresh)
tk_obj.mainloop()
import gym

env = gym.make('CartPole-v0')

import random
import QLearning  # Your implementation goes here...
import Assignment7Support

discountRate = 0.98  # Controls the discount rate for future rewards -- this is gamma from 13.10
actionProbabilityBase = 1.8  # This is k from the P(a_i|s) expression from section 13.3.5 and influences how random exploration is
randomActionRate = 0.01  # Percent of time the next action selected by GetAction is totally random
learningRateScale = 0.01  # Should be multiplied by visits_n from 13.11.
trainingIterations = 20000

qlearner = QLearning.QLearning(
    stateSpaceShape=Assignment7Support.CartPoleStateSpaceShape(),
    numActions=env.action_space.n,
    discountRate=discountRate)

for trialNumber in range(trainingIterations):
    observation = env.reset()
    reward = 0
    for i in range(300):
        env.render()

        print("Iteration ", i)
        currentState = Assignment7Support.CartPoleObservationToStateSpace(
            observation)
        action = qlearner.GetAction(
            currentState,
            learningMode=True,
            randomActionRate=randomActionRate,