示例#1
0
    def __init__(self,
                 action_set,
                 learning_ratio=0.01,
                 gama=0.5,
                 epsilon=0.01,
                 load_from_file=False):
        self.action_set = action_set
        self.learning_ratio = learning_ratio
        self.gama = gama
        self.epsilon = epsilon
        self.filename = 'pong_agent_cmac'

        offsets = [8 * 1, 0, 8 * 3, 0, 0]
        dimensions = [
            Dimension(tile_width=8, minn=0, maxx=48),
            Dimension(tile_width=1, minn=0, maxx=2),
            Dimension(tile_width=8, minn=0, maxx=64),
            Dimension(tile_width=1, minn=0, maxx=2),
            Dimension(tile_width=1, minn=0, maxx=3)
        ]

        self.q_func = CMAC(offsets=offsets, dimensions=dimensions, n_tilings=4)

        if load_from_file:
            self.q_func.load_from_file(self.filename)
示例#2
0
 def testCmacComputation(self):
     key = [
         0x12, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56, 0x78, 0x12, 0x34, 0x56,
         0x78, 0x12, 0x34, 0x56, 0x78
     ]
     cmac = CMAC(key)
     cmac.update([0xde, 0xad, 0xbe, 0xef])
     result = cmac.final()
     expectedResult = bytearray(
         [0xb5, 0xf3, 0xeb, 0x27, 0x15, 0x45, 0xe5, 0x55])
     self.assertEqual(result, expectedResult)
示例#3
0
def aesEncrypt(key, data, mode='CMAC'):
    """AES encryption function
    
    Args:
        key (str): packed 128 bit key
        data (str): packed plain text data
        mode (str): Optional mode specification (CMAC)
        
    Returns:
        Packed encrypted data string
    """
    dataorder = 'big'
    keyorder = 'big'

    if mode == 'CMAC':
        cipher = CMAC()
        # there must be a better way to do this
        key = (int.from_bytes(key[0:4],
                              'big'), int.from_bytes(key[4:8], 'big'),
               int.from_bytes(key[8:12],
                              'big'), int.from_bytes(key[12:16], 'big'))
        if len(data) <= 16:
            length = len(data) * 8
            data = data + bytearray((16 - len(data)))
            data = [(int.from_bytes(data[0:4],
                                    'big'), int.from_bytes(data[4:8], 'big'),
                     int.from_bytes(data[8:12],
                                    'big'), int.from_bytes(data[12:16],
                                                           'big'))]
        elif len(data) > 16:
            length = (len(data) - 16) * 8
            data = data + bytearray((32 - len(data)))
            data = [(int.from_bytes(data[0:4],
                                    'big'), int.from_bytes(data[4:8], 'big'),
                     int.from_bytes(data[8:12],
                                    'big'), int.from_bytes(data[12:16],
                                                           'big')),
                    (int.from_bytes(data[16:20],
                                    'big'), int.from_bytes(data[20:24], 'big'),
                     int.from_bytes(data[24:28],
                                    'big'), int.from_bytes(data[28:32],
                                                           'big'))]
        else:
            print('Data greater than 32 bytes')
        mic = cipher.cmac(key, data, length)
        mic = mic[0].to_bytes(4, 'big')
        return mic

    else:
        try:
            cipher = AES.new(key, AES.MODE_ECB)
        except:
            cipher = AES(key, AES.MODE_ECB)
        return cipher.encrypt(data)
示例#4
0
 def __init__(self, epsilon=0.1, alpha=0.1, gamma=0.9, decayRate=0.9, seed=12345,
              cmac_level=20, cmac_quantization=0.3, cmac_beta=0.1, port=12345,serverPath = "/home/leno/HFO/bin/"):
     super(SARSA, self).__init__(seed, port,serverPath=serverPath)
     self.name = "SARSA"
     self.qTable = {}
     self.stateActionTrace = {}
     self.epsilon = epsilon
     self.alpha = alpha
     self.gamma = gamma
     self.decayRate = decayRate
     self.cmac = CMAC(cmac_level,cmac_quantization,cmac_beta)
示例#5
0
文件: sarsa.py 项目: cowhi/HFO
 def __init__(self, epsilon=0.1, alpha=0.1, gamma=0.9, decayRate=0.9, seed=12345,
              cmac_level=20, cmac_quantization=0.3, cmac_beta=0.1, port=12345,serverPath = "/home/leno/HFO/bin/"):
     super(SARSA, self).__init__(seed, port,serverPath=serverPath)
     self.name = "SARSA"
     self.qTable = {}
     self.stateActionTrace = {}
     self.epsilon = epsilon
     self.alpha = alpha
     self.gamma = gamma
     self.decayRate = decayRate
     self.cmac = CMAC(cmac_level,cmac_quantization,cmac_beta)
示例#6
0
def main():
    print('New agent online!')
    print('..... Initializing learning algorithm: SARSA')
    ACTIONS = [MOVE, SHOOT, PASS_CLOSE, PASS_FAR, DRIBBLE]
    SARSA = SARSA(ACTIONS)
    print('..... Initializing discretization with CMAC')
    CMAC = CMAC(1,0.5,0.1)
    print('..... Loading HFO environment')
    hfo = HFOEnvironment()
    print('..... Connecting to HFO server')
    hfo.connectToServer(HIGH_LEVEL_FEATURE_SET,
                      'bin/teams/base/config/formations-dt', 6000,
                      'localhost', 'base_left', False)
    print('..... Start training')
    for episode in itertools.count():
        print('..... Starting episode %d' % episode)
        status = IN_GAME
        step = 0
        while status == IN_GAME:
            step += 1
            old_status = status
            # Get the vector of state features for the current state
            features = hfo.getState()
            state = transformFeatures(features)
            print('State: %s' % str(state))

            action = select_action(state)
            hfo.act(action)
            #print('Action: %s' % str(action))
            # Advance the environment and get the game status
            status = hfo.step()
            #print('Status: %s' % str(status))
            print('.......... Step %d: %s - %s - %s' % (step, str(old_status), str(action), str(status)))
        # Check the outcome of the episode
        print('..... Episode ended with %s'% hfo.statusToString(status))
        # Quit if the server goes down
        if status == SERVER_DOWN:
            hfo.act(QUIT)
            break
示例#7
0
 def __init__(self, gen_factor, num_weights):
     CMAC.__init__(self, gen_factor, num_weights)
示例#8
0
class PongAgent:
    def __init__(self,
                 action_set,
                 learning_ratio=0.01,
                 gama=0.5,
                 epsilon=0.01,
                 load_from_file=False):
        self.action_set = action_set
        self.learning_ratio = learning_ratio
        self.gama = gama
        self.epsilon = epsilon
        self.filename = 'pong_agent_cmac'

        offsets = [8 * 1, 0, 8 * 3, 0, 0]
        dimensions = [
            Dimension(tile_width=8, minn=0, maxx=48),
            Dimension(tile_width=1, minn=0, maxx=2),
            Dimension(tile_width=8, minn=0, maxx=64),
            Dimension(tile_width=1, minn=0, maxx=2),
            Dimension(tile_width=1, minn=0, maxx=3)
        ]

        self.q_func = CMAC(offsets=offsets, dimensions=dimensions, n_tilings=4)

        if load_from_file:
            self.q_func.load_from_file(self.filename)

    def __get_action_index(self, action):
        for x in range(0, len(self.action_set)):
            if action == self.action_set[x]:
                return x
        return None

    def __choose_best_action_index(self, state):
        best_state = PongState(state, 0)
        best_reward = self.q_func.get_weight(best_state)

        for a in range(1, len(self.action_set)):
            current_state = PongState(state, a)
            current_reward = self.q_func.get_weight(current_state)

            if current_reward > best_reward:
                best_state = current_state
                best_reward = current_reward

        return best_state.action_index

    def pick_action(self, state):
        best_action_index = self.__choose_best_action_index(state)

        if random() < self.epsilon:
            best_action_index = randint(0, len(self.action_set) - 1)

        return self.action_set[best_action_index]

    # TODO: Remake
    def update_q_function(self, state, action, reward, next_state):
        best_next_action_index = self.__choose_best_action_index(next_state)
        best_action_index = self.__get_action_index(action)

        pong_state = PongState(state, best_action_index)
        pong_next_state = PongState(next_state, best_next_action_index)

        now_q_func = self.q_func.get_weight(pong_state)
        next_q_func = self.q_func.get_weight(pong_next_state)
        expected_reward = reward + self.gama * next_q_func

        new_weight = now_q_func + self.learning_ratio * (expected_reward -
                                                         now_q_func)
        self.q_func.set_weight(pong_state, new_weight)

    def save_to_file(self):
        self.q_func.save_to_file(self.filename)
示例#9
0
def transformFeatures(features):
    ''' From continuous to discrete using CMAC '''
    data = []
    for feature in features:
        quantized_features = CMAC.quantize(feature)
        data.append([pts])
示例#10
0
文件: sarsa.py 项目: cowhi/HFO
class SARSA(Agent):

    def __str__(self):
        """ Overwrites the object.__str__ method.

        Returns:
            string (str): Important parameters of the object.
        """
        return "Agent: " + str(self.unum) + ", " + \
               "Type: " + str(self.name) + ", " + \
               "Training steps: " + str(self.training_steps_total) + ", " + \
               "Q-Table size: " + str(len(self.qTable))

    def __init__(self, epsilon=0.1, alpha=0.1, gamma=0.9, decayRate=0.9, seed=12345,
                 cmac_level=20, cmac_quantization=0.3, cmac_beta=0.1, port=12345,serverPath = "/home/leno/HFO/bin/"):
        super(SARSA, self).__init__(seed, port,serverPath=serverPath)
        self.name = "SARSA"
        self.qTable = {}
        self.stateActionTrace = {}
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.decayRate = decayRate
        self.cmac = CMAC(cmac_level,cmac_quantization,cmac_beta)
        #print('***** %s: Agent uses CMAC(%s,%s,%s)' % (str(self.unum),str(cmac_level), str(cmac_quantization), str(cmac_beta)))

    def quantize_features(self, features):
        """ CMAC utilities for all agent """
        quantVar = self.cmac.quantize(features)        
        data = []
        #len(quantVar[0]) is the number of variables
        for i in range(0,len(quantVar[0])):
            #Transforms n tuples into a single array
            for var in quantVar:
                #copy each tuple value to the output
                data.append(var[i])
        #returns the output as a tuple
        return tuple(data)

    def advise_action(self,uNum,state):
        """Verifies if the agent can advice a friend, and return the action if possible"""
        return None #No advising

    def get_Q(self, state, action):
        return self.qTable.get((state, action), 0.0)

    def observe_reward(self,state,action,reward,statePrime):
        """ After executing an action, the agent is informed about the state-reward-state tuple """
        pass
    '''
    def observe_reward(self,state,action,reward,statePrime):
        """ After executing an action, the agent is informed about the state-action-reward-state tuple """
        if self.exploring:
            #Selects the action for the next state without exploration
            lastState = self.lastState
            self.exploring = False
            nextAction = self.select_action(statePrime)
            #Hereafter the self.lastState refers to statePrime
            #Executes Q-update
            self.learn(lastState,action,reward,self.lastState,nextAction)
            #turns on the exploration again
    '''

    def select_action(self, stateFeatures, state, noAdvice=False):
        """Executes the epsilon-greedy exploration strategy"""
        #stores last CMAC result
        #self.lastState = state
        # select applicable actions
        if stateFeatures[5] == 1: # State[5] is 1 when the player can kick the ball
            actions = [self.SHOOT, self.DRIBBLE, self.PASSfar, self.PASSnear]
        else:
            return self.MOVE
        # epsilon greedy action selection
        if self.exploring and random.random() < self.epsilon and not noAdvice:
            actionsRandom = [ self.SHOOT,self.DRIBBLE, self.DRIBBLE, self.SHOOT, self.PASSfar, self.PASSnear]
            return random.choice(actionsRandom)
        else:
            cmacState = self.quantize_features(state)
            qValues = [self.get_Q(cmacState, action) for action in actions]
            maxQ = max(qValues)
            count = qValues.count(maxQ)
            if count > 1: #and self.exploring:
                best = [i for i in range(len(actions)) if qValues[i] == maxQ]
                if not self.exploring:
                    return actions[best[0]]
                return actions[random.choice(best)]
            else:
                return actions[qValues.index(maxQ)]


    def learn(self, state1, action1, reward, state2, action2):
        qnext = self.get_Q(state2, action2)
        self.learn_Q(state1, action1, reward, reward + self.gamma * qnext)

    def learn_Q(self, state, action, reward, value):
        oldv = self.qTable.get((state, action), None)
        if oldv is None:
            self.qTable[(state, action)] = reward
        else:
            self.qTable[(state, action)] = oldv + self.alpha * (value - oldv)

    def step(self, state, action):
        """ Perform a complete training step """
        # perform action and observe reward & statePrime
        self.execute_action(action)
        status = self.hfo.step()
        stateFeatures = self.hfo.getState()
        statePrime = self.get_transformed_features(stateFeatures)
        stateQuantized = self.quantize_features(state)
        statePrimeQuantized = self.quantize_features(statePrime)
        reward = self.get_reward(status)
        # select actionPrime
        if self.exploring:
            actionPrime = self.select_action(stateFeatures, statePrime,False)
        else:
            actionPrime = self.select_action(stateFeatures, statePrime,True)

        if self.exploring:
            # calculate TDError
            TDError = reward + self.gamma * self.get_Q(statePrimeQuantized, actionPrime) - self.get_Q(stateQuantized, action)
            # update trace value
            self.stateActionTrace[(stateQuantized, action)] = self.stateActionTrace.get((stateQuantized, action), 0) + 1
            for stateAction in self.stateActionTrace:
                # update update ALL Q values and eligibility trace values
                self.qTable[stateAction] = self.qTable.get(stateAction, 0) + TDError * self.alpha * self.stateActionTrace.get(stateAction, 0)
                # update eligibility trace Function for state and action
                self.stateActionTrace[stateAction] = self.gamma * self.decayRate * self.stateActionTrace.get(stateAction, 0)
            #self.learn(stateQuantized, action, reward,
            #           statePrimeQuantized, actionPrime)
            self.training_steps_total += 1
        if status != self.IN_GAME:
            self.stateActionTrace = {}
        return status, statePrime, actionPrime
    def setupAdvising(self,agentIndex,allAgents):
        """ This method is called in preparation for advising """
        pass
示例#11
0
文件: test_cmac.py 项目: bionicv/cmac
#! /usr/bin/env python
"""
Author: Jeremy M. Stober
Program: TEST_CMAC.PY
Date: Saturday, April 14 2012
Description: Test code for CMAC implementation.
"""

from cmac import CMAC
import numpy as np
import numpy.random as npr
import pdb
import pylab
from utils import dual_scatter, create_cluster_colors_rgb,lvl_scatter

c = CMAC(1,0.5,0.1)


# t = [[0.2,0.2],[.6,.6],[.2,.6],[.6,.2]]
# for i in t:
#     print i, c.quantize(i), c.quantize_alt(i), c.quantize_fast(i)

c = CMAC(3,0.15,0.1)

#pdb.set_trace()
#c.quantize_alt([0.05, 0.1])

data = []
for i in range(1000):
    t = npr.rand(2) * 2 - 1
    pts = c.quantize(t)
示例#12
0
from cmac import CMAC
import numpy as np
import numpy.random as npr

c = CMAC(5, 0.1, 0.1)

data = [0.5, 0.7, 0.1, 0, 1, 0.543]
#for i in range(1):
#    t = npr.rand(10) * 4 - 2
#    print(type(t))
#    print("t[%d]: %s" % (i,str(t)))
#    pts = c.quantize(t)
#    print("pts[%d]: %s" % (i,str(pts)))
#pts = c.quantize_alt(t)
#print("pts[%d]: %s" % (i,str(pts)))
#pts = c.quantize_fast(t)
#print("pts[%d]: %s" % (i,str(pts)))
#    data.append([t,pts])

print data

print c.quantize(data)

#labels = [d[1][0] for d in data]

#print len(set(labels))
示例#13
0
class SARSA(Agent):

    def __str__(self):
        """ Overwrites the object.__str__ method.

        Returns:
            string (str): Important parameters of the object.
        """
        return "Agent: " + str(self.unum) + ", " + \
               "Type: " + str(self.name) + ", " + \
               "Training steps: " + str(self.training_steps_total) + ", " + \
               "Q-Table size: " + str(len(self.qTable))

    def __init__(self, epsilon=0.1, alpha=0.1, gamma=0.9, decayRate=0.9, seed=12345,
                 cmac_level=20, cmac_quantization=0.3, cmac_beta=0.1, port=12345,serverPath = "/home/leno/HFO/bin/"):
        super(SARSA, self).__init__(seed, port,serverPath=serverPath)
        self.name = "SARSA"
        self.qTable = {}
        self.stateActionTrace = {}
        self.epsilon = epsilon
        self.alpha = alpha
        self.gamma = gamma
        self.decayRate = decayRate
        self.cmac = CMAC(cmac_level,cmac_quantization,cmac_beta)
        #print('***** %s: Agent uses CMAC(%s,%s,%s)' % (str(self.unum),str(cmac_level), str(cmac_quantization), str(cmac_beta)))

    def quantize_features(self, features):
        """ CMAC utilities for all agent """
        quantVar = self.cmac.quantize(features)        
  #      data = []
        data = quantVar
        #len(quantVar[0]) is the number of variables
#        for i in range(0,len(quantVar[0])):
            #Transforms n tuples into a single array
#            for var in quantVar:
                #copy each tuple value to the output
#                data.append(var[i])
        #returns the output as a tuple
        return tuple(data)

    def advise_action(self,uNum,state):
        """Verifies if the agent can advice a friend, and return the action if possible"""
        return None #No advising

    def get_Q(self, state, action):
        return self.qTable.get((state, action), 0.0)

    def observe_reward(self,state,action,reward,statePrime):
        """ After executing an action, the agent is informed about the state-reward-state tuple """
        pass
    '''
    def observe_reward(self,state,action,reward,statePrime):
        """ After executing an action, the agent is informed about the state-action-reward-state tuple """
        if self.exploring:
            #Selects the action for the next state without exploration
            lastState = self.lastState
            self.exploring = False
            nextAction = self.select_action(statePrime)
            #Hereafter the self.lastState refers to statePrime
            #Executes Q-update
            self.learn(lastState,action,reward,self.lastState,nextAction)
            #turns on the exploration again
    '''

    def select_action(self, stateFeatures, state, noAdvice=False):
        """Executes the epsilon-greedy exploration strategy"""
        #stores last CMAC result
        #self.lastState = state
        # select applicable actions
        if stateFeatures[5] == 1: # State[5] is 1 when the player can kick the ball
            actions = [self.SHOOT, self.DRIBBLE, self.PASSfar, self.PASSnear]
        else:
            return self.MOVE
        # epsilon greedy action selection
        if self.exploring and random.random() < self.epsilon and not noAdvice:
            actionsRandom = [ self.SHOOT,self.DRIBBLE, self.DRIBBLE, self.SHOOT, self.PASSfar, self.PASSnear]
            return random.choice(actionsRandom)
        else:
            cmacState = self.quantize_features(state)
            qValues = [self.get_Q(cmacState, action) for action in actions]
            maxQ = max(qValues)
            count = qValues.count(maxQ)
            if count > 1: #and self.exploring:
                best = [i for i in range(len(actions)) if qValues[i] == maxQ]
                if not self.exploring:
                    return actions[best[0]]
                return actions[random.choice(best)]
            else:
                return actions[qValues.index(maxQ)]


    def learn(self, state1, action1, reward, state2, action2):
        qnext = self.get_Q(state2, action2)
        self.learn_Q(state1, action1, reward, reward + self.gamma * qnext)

    def learn_Q(self, state, action, reward, value):
        oldv = self.qTable.get((state, action), None)
        if oldv is None:
            self.qTable[(state, action)] = reward
        else:
            self.qTable[(state, action)] = oldv + self.alpha * (value - oldv)

    def step(self, state, action):
        """ Perform a complete training step """
        # perform action and observe reward & statePrime
        self.execute_action(action)
        status = self.hfo.step()
        stateFeatures = self.hfo.getState()
        statePrime = self.get_transformed_features(stateFeatures)
        stateQuantized = self.quantize_features(state)
        statePrimeQuantized = self.quantize_features(statePrime)
        reward = self.get_reward(status)
        # select actionPrime
        if self.exploring:
            actionPrime = self.select_action(stateFeatures, statePrime,False)
        else:
            actionPrime = self.select_action(stateFeatures, statePrime,True)

        if self.exploring:
            # calculate TDError
            TDError = reward + self.gamma * self.get_Q(statePrimeQuantized, actionPrime) - self.get_Q(stateQuantized, action)
            # update trace value
            self.stateActionTrace[(stateQuantized, action)] = self.stateActionTrace.get((stateQuantized, action), 0) + 1
            for stateAction in self.stateActionTrace:
                # update update ALL Q values and eligibility trace values
                self.qTable[stateAction] = self.qTable.get(stateAction, 0) + TDError * self.alpha * self.stateActionTrace.get(stateAction, 0)
                # update eligibility trace Function for state and action
                self.stateActionTrace[stateAction] = self.gamma * self.decayRate * self.stateActionTrace.get(stateAction, 0)
            #self.learn(stateQuantized, action, reward,
            #           statePrimeQuantized, actionPrime)
            self.training_steps_total += 1
        if status != self.IN_GAME:
            self.stateActionTrace = {}
        return status, statePrime, actionPrime
    def setupAdvising(self,agentIndex,allAgents):
        """ This method is called in preparation for advising """
        pass