Пример #1
0
def run_games(learner, hist, iters = 100, t_len = 100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''

    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,                  # Don't play sounds.
                             text="Epoch %d" % (ii),       # Display the epoch on screen.
                             tick_length = t_len,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            learner.last_state = swing.get_state()
            pass

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return
Пример #2
0
def evaluate(gamma=0.4, iters=100, chatter=True):

    learner = TDValueLearner()
    learner.gamma = gamma

    highscore = 0
    avgscore = 0.0

    for ii in xrange(iters):

        learner.epsilon = 1/(ii+1)

        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,            # Don't play sounds.
                             text="Epoch %d" % (ii), # Display the epoch on screen.
                             tick_length=1,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highscore = max([highscore, score])
        avgscore = (ii*avgscore+score)/(ii+1)

        if chatter:
            print ii, score, highscore, avgscore

        # Reset the state of the learner.
        learner.reset()

    return -avgscore
Пример #3
0
def evaluate(gamma=0.4, iters=100, chatter=True):

    learner = TDValueLearner()
    learner.gamma = gamma

    highscore = 0
    avgscore = 0.0

    for ii in xrange(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highscore = max([highscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)

        if chatter:
            print ii, score, highscore, avgscore

        # Reset the state of the learner.
        learner.reset()

    return -avgscore
Пример #4
0
def testgame(iters=100, show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    learner.alpha = 0.2
    learner.gamma = 0.6
    alpha = learner.alpha
    gamma = learner.gamma
    with open("test_Q2.csv", "w", newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(
            ["alpha", "gamma", "epoch", "highest", "average", "score", "q"])

    for ii in range(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)
        q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)

        if show == True:
            print("epoch:", ii, "highest:", highestscore, "current score:",
                  score, "average:", avgscore, "% of Q mx filled:", q)
        with open("test_Q2.csv", "a+", newline='') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(
                [[alpha, gamma, ii, highestscore, avgscore, score, q]])

        # Reset the state of the learner.
        learner.reset()

    pg.quit()
    return avgscore, highestscore, score
Пример #5
0
def testgame(iters=100,show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    record={}
    record['epoch']=[]
    record['highest']=[]
    record['avg']=[]
    record['score']=[]
    record['q']=[]

    for ii in range(iters):

        learner.epsilon = 1/(ii+1)

        # Make a new monkey object.
        swing = SwingyMonkey(sound=False,            # Don't play sounds.
                             text="Epoch %d" % (ii), # Display the epoch on screen.
                             tick_length=1,          # Make game ticks super fast.
                             action_callback=learner.action_callback,
                             reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii*avgscore+score)/(ii+1)
        q=round(float(np.count_nonzero(learner.Q))*100/learner.Q.size,3)
        
        if show==True:
            print "epoch:",ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q 
            
        record['epoch'].append(ii)
        record['highest'].append(highestscore)
        record['avg'].append(avgscore)
        record['score'].append(score)
        record['q'].append(q)
        pickle.dump( record, open( "record12.p", "wb" ) )  
        # Reset the state of the learner.
        learner.reset()
    

    return avgscore,highestscore,score
Пример #6
0
def testgame(iters=100, show=True):

    learner = QLearner2()

    highestscore = 0
    avgscore = 0
    record = {}
    record['epoch'] = []
    record['highest'] = []
    record['avg'] = []
    record['score'] = []
    record['q'] = []

    for ii in range(iters):

        learner.epsilon = 1 / (ii + 1)

        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=1,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        while swing.game_loop():
            pass

        score = swing.get_state()['score']
        highestscore = max([highestscore, score])
        avgscore = (ii * avgscore + score) / (ii + 1)
        q = round(float(np.count_nonzero(learner.Q)) * 100 / learner.Q.size, 3)

        if show == True:
            print "epoch:", ii, "highest:", highestscore, "current score:", score, "average:", avgscore, "% of Q mx filled:", q

        record['epoch'].append(ii)
        record['highest'].append(highestscore)
        record['avg'].append(avgscore)
        record['score'].append(score)
        record['q'].append(q)
        pickle.dump(record, open("record12.p", "wb"))
        # Reset the state of the learner.
        learner.reset()

    return avgscore, highestscore, score
Пример #7
0
    learning_rate = (learning_rate_start + .5) / (iters / 100)

    # Make a new monkey object.
    swing = SwingyMonkey(
        sound=False,  # Don't play sounds.
        text="Epoch %d" % (ii),  # Display the epoch on screen.
        tick_length=1,  # Make game ticks super fast.
        action_callback=learner.action_callback,
        reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass

    #store all values for mins and max  calcs -- only need to run once to get values for the find_state_bounds function which saves these values
    scorelist.append(swing.get_state()['score'])

    #print swing.get_state()
    # Reset the state of the learner.
    learner.reset()

#calculate avg score for this approach
print numpy.average(scorelist)

# the function below finds the min/max values for each variable to deterime bin ranges
#def find_state_bounds():

# print scorelist
# seq_tree = [x['tree'] for x in scorelist]
# seq_tree_min= min(seq_tree)
# seq_tree_max= max(seq_tree)
Пример #8
0
for ii in xrange(iters):
    learning_rate=(learning_rate_start+.5)/(iters/100)

    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=1,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass

    #store all values for mins and max  calcs -- only need to run once to get values for the find_state_bounds function which saves these values    
    scorelist.append(swing.get_state()['score'])

    #print swing.get_state()
    # Reset the state of the learner.
    learner.reset()

#calculate avg score for this approach
print numpy.average(scorelist)



# the function below finds the min/max values for each variable to deterime bin ranges
#def find_state_bounds():

    # print scorelist
    # seq_tree = [x['tree'] for x in scorelist]
Пример #9
0
iters = 10000
learner = Learner()
reward = []
score = []
score_cur = 0
ii = 0

#for ii in xrange(iters):

while score_cur < 100:
    ii += 1
    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=0,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass
    reward.append(learner.last_reward)
    score_cur = swing.get_state()["score"]
    score.append(swing.get_state()["score"])

    print "################### Score = " + \
          str(swing.get_state()["score"]) + " ########################"
    # Reset the state of the learner.
    learner.reset()

Пример #10
0
def run_games(learner, hist, iters=100, t_len=100):
    '''
    Driver function to simulate learning by having the agent play a sequence of games.
    '''
    net_states = []
    net_rewards = []
    net_actions = []
    for ii in range(iters):
        # Make a new monkey object.
        swing = SwingyMonkey(
            sound=False,  # Don't play sounds.
            text="Epoch %d" % (ii),  # Display the epoch on screen.
            tick_length=t_len,  # Make game ticks super fast.
            action_callback=learner.action_callback,
            reward_callback=learner.reward_callback)

        # Loop until you hit something.
        iter_states = []
        iter_rewards = []
        iter_actions = []
        iter_count = 0
        while swing.game_loop():
            state = swing.get_state()
            iter_states.append(np.array(state['tree'].values()+\
                    state['monkey'].values()+[learner.gravity]))
            iter_rewards.append(learner.last_reward)
            iter_actions.append(int(learner.last_action))
            iter_count += 1
            if iter_count > 1 and learner.know_gravity == False:
                learner.learn_gravity(iter_states, iter_actions)
                if learner.know_gravity == True:
                    for num in range(len(iter_states)):
                        iter_states[num][-1] = learner.gravity
        #To get the state after the
        state = swing.get_state()
        iter_states.append(state['tree'].values()+\
                    state['monkey'].values()+[learner.gravity])
        iter_rewards.append(learner.last_reward)
        iter_actions.append(int(learner.last_action))

        #Adding to the net training set
        net_states += iter_states
        net_rewards += iter_rewards
        net_actions += iter_actions

        if ii == 0:
            xtrain = build_training_set(net_states, net_actions)
            ytrain = np.array(net_rewards)
            RF = ExtraTreesRegressor(n_estimators=50)
            RF.fit(xtrain, ytrain)

        else:
            xtrain = build_training_set(net_states[:-1], net_actions[:-1])
            #Building the q_state update.
            ytrain = np.array([learner.model.predict(np.append(net_states[k], net_actions[k])) + \
                    learner.alpha*(net_rewards[k] + learner.gamma* np.max([learner.model.predict(np.append(net_states[k+1], int(action)))\
                            for action in learner.actions]) - \
                    learner.model.predict(np.append(net_states[k], net_actions[k]))) for k in range(len(net_states)-1)])
            RF = ExtraTreesRegressor(n_estimators=50)
            RF.fit(xtrain, ytrain)

        learner.model = RF
        learner.model_trained = True

        if ii % 10 == 0:
            learner.epsilon -= 0.05

        # Save score history.
        hist.append(swing.score)

        # Reset the state of the learner.
        learner.reset()

    return
Пример #11
0
iters = 10000
learner = Learner()
reward = []
score = []
score_cur = 0
ii = 0

#for ii in xrange(iters):

while score_cur < 100:
    ii += 1
    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=0,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass
    reward.append(learner.last_reward)
    score_cur = swing.get_state()["score"]
    score.append(swing.get_state()["score"])

    print "################### Score = " + \
          str(swing.get_state()["score"]) + " ########################"
    # Reset the state of the learner.
    learner.reset()

Пример #12
0
while ii < 1e5:
    ii += 1
    # Make a new monkey object.
    swing = SwingyMonkey(
        sound=False,  # Don't play sounds.
        text="Epoch %d" % (ii),  # Display the epoch on screen.
        tick_length=0,  # Make game ticks super fast.
        action_callback=learner.action_callback,
        reward_callback=learner.reward_callback,
    )

    # Loop until you hit something.
    while swing.game_loop():
        pass
    reward.append(learner.last_reward)
    score_cur = swing.get_state()["score"]
    veloc_cur = swing.get_state()["monkey"]["vel"]
    result_cur = learner.result_callback()
    qnorm = np.linalg.norm(learner.Q)
    score.append(score_cur)
    state_grid.append(learner.state_grid)
    state_num.append(learner.state_num)

    result.append(result_cur)
    Qnorm.append(qnorm)

    State = np.sum(learner.Q != 0)
    totalState = np.sum(learner.Q > -np.inf)

    if ii > 0 and ii % 50 == 0:
        np.save(data_dir + "Qmat_backup.npy", learner.Q)
Пример #13
0
#while score_cur < 5000:
while ii < 1e5:
    ii += 1
    # Make a new monkey object.
    swing = SwingyMonkey(
        sound=False,  # Don't play sounds.
        text="Epoch %d" % (ii),  # Display the epoch on screen.
        tick_length=0,  # Make game ticks super fast.
        action_callback=learner.action_callback,
        reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        pass
    reward.append(learner.last_reward)
    score_cur = swing.get_state()["score"]
    veloc_cur = swing.get_state()["monkey"]["vel"]
    result_cur = learner.result_callback()
    qnorm = np.linalg.norm(learner.Q)
    score.append(score_cur)
    state_grid.append(learner.state_grid)
    state_num.append(learner.state_num)

    result.append(result_cur)
    Qnorm.append(qnorm)

    State = np.sum(learner.Q != 0)
    totalState = np.sum(learner.Q > -np.inf)

    if ii > 0 and ii % 50 == 0:
        np.save(data_dir + "Qmat_backup.npy", learner.Q)
        return self.last_action

    def reward_callback(self, reward):
        self.last_reward = reward

iters = 100
nvars = 3
nstates = 10
alpha = 0.2
gamma = 0.9
epsil = 0.1
learner = Learner(nvars, nstates, alpha, gamma, epsil)

for ii in xrange(iters):

    # Make a new monkey object.
    swing = SwingyMonkey(sound=False,            # Don't play sounds.
                         text="Epoch %d" % (ii), # Display the epoch on screen.
                         tick_length=1,          # Make game ticks super fast.
                         action_callback=learner.action_callback,
                         reward_callback=learner.reward_callback)

    # Loop until you hit something.
    while swing.game_loop():
        print swing.get_state()
        pass

    # Reset the state of the learner.
    learner.reset()