def valueiteratingpolicy(n):
    v = initvalact.initvalact(n)
    value = v[0]
    action = v[1]
    reward = rewardsegregation.rewardsegregation(n, p, p1, encoder, ENClast)
    print reward
    test = ValueIteration.valueiteration(value, reward, action)
    policy = test[1]
    print policy
    raw = 0
    col = 0
    gotopos.gotopos(raw, col, p, p1, n)
    # 0 = up / 1 = down / 2 = left / 3= right
    global val1
    val1 = pinSetup.valueRead_ON()
    while True and val1 == 0:
        if action[raw][col] == 0:
            act.playAction(0, raw, col, n, p, p1)
            raw = raw - 1

        elif action[raw][col] == 1:
            act.playAction(1, raw, col, n, p, p1)
            raw = raw + 1

        elif action[raw][col] == 2:
            act.playAction(2, raw, col, n, p, p1)
            col = col - 1

        elif action[raw][col] == 3:
            act.playAction(3, raw, col, n, p, p1)
            col = col + 1
        val1 = pinSetup.valueRead_ON()
    if val1 == 1:
        print "Stop"
示例#2
0
def rewardsegregation(n, p, p1, encoder, ENClast):
    reward = generate_rewardmatrix.generate_rewardmatrix(n)
    for raw in range(0, n):
        for col in range(0, n):
            for action in action_select(raw, col, n):
                gotopos.gotopos(raw, col, p, p1, n)
                time.sleep(0.5)
                ENClast = encoder.getData()
                action_20.playAction(action, raw, col, n, p, p1)
                time.sleep(0.1)
                if action == 0 or action == 1:
                    ENClast = encoder.getData()
                ENC = encoder.getData()
                reward[raw][col][action] = ENC - ENClast
    return reward
示例#3
0
def rewardsegregation(n, p, p1, encoder, ENClast):
    reward = generate_rewardmatrix.generate_rewardmatrix(n)
    for raw in range(0, n):
        for col in range(0, n):
            for action in action_select(raw, col, n):
                gotopos.gotopos(raw, col, p, p1, n)
                time.sleep(0.3)
                ENClast = encoder.getData()
                act.playAction(action, raw, col, n, p, p1)
                time.sleep(t)
                if action == 0 or action == 1:
                    ENClast = encoder.getData()
                ENC = encoder.getData()
                direction = pinSetup.valueRead_dir()
                print((-1)**direction) * (ENC - ENClast)
                reward[raw][col][action] = ((-1)**direction) * (ENC - ENClast)
                time.sleep(0.05)
            time.sleep(0.1)
        time.sleep(0.1)
    return reward
示例#4
0
def qLearning(n, p, p1, encoder, ENClast):
    import qinitial
    v = initvalact.initvalact(n)
    Q = qinitial.qinitial(n)
    #Q = [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]] 
    a = v[1]
    # a = [[None, None, None], [None, None, None],  [None, None, None]]  # initializing action matrix
    # size = reading size of the given Q matrix [Nos of raws, Nos. of col, Nos. of actions possible per state]
    size = np.shape(Q)  # storing size of Q-matrix
    n = size[0]
    Qlast = generateDummy(Q)  # generating dummy of same sizq as Q to enter the while loop
    iteration = 0  # initializing the iteration
    reward = generate_rewardmatrix.generate_rewardmatrix(n)
    while qError(Q, Qlast) > 1.5 or Q == Qlast:  # check for the error value to be 10**-3 or Q = Qlast
        # we want here Q!=Qlast becauses in starting phase if reward is zero in next step we will read error = 0
        # and this will cause us to fall out of the loop
        iteration += 1  # incresing iteration value
        Qlast = deepcopy(Q)  # copying Q to Qlast
        # state =  selecting state randomly 1every time depending on the Q size
        state = random.randint(1, size[0] * size[1])
        # temp = to retrive raw and column from Nos of state generated by random selector
        # state / Nos.of column will give us information about the raw number...
        # for 3x4 (raw x column) state 1 to 4 are raw in 1 and state 5 to 8 are raw in 2
        # for raw1(state 1 to 4)/4 (total columns) will be 0 < temp <= 1
        # for raw1(state 5 to 8)/4 (total columns) will be 1 < temp <= 2
        temp = state / (size[1] * 1.0)  # defining a temporary variable
        if ((temp).is_integer()):
            raw = int(temp) - 1
        else:
            raw = int(temp)
        # temp = modulo of state and Total column
        # for column1(state 1,5,9) % 4 (total columns) will be 1 [i.e colum = 1-1 = 0]
        # for column1(state 2,6,10) % 4 (total columns) will be 2 [i.e colum = 2-1 = 1]
        temp = state % size[1]
        col = temp - 1
        if col < 0:
            col = size[1] - 1
        else:
            pass
        gotopos.gotopos(raw, col, p, p1, n)  # to go to state that is selected randomly
        time.sleep(0.3)
	# ipdb.set_trace()
        for i in range(0, 20):
            # action selection according to selction of state
            if raw == 0 and col == 0:
                action = random.choice([1, 3])
            elif raw == 0 and (col == -1 or col == size[1]-1):
                action = random.choice([1, 2])
            elif raw == 0:
                action = random.choice([1, 2, 3])

            elif raw == size[0]-1 and col == 0:
                action = random.choice([0, 3])
            elif raw == size[0]-1 and (col == -1 or col == size[1]-1):
                action = random.choice([0, 2])
            elif raw == size[0]-1:
                action = random.choice([0, 2, 3])

            elif col == 0:
                action = random.choice([0, 1, 3])
            elif (col == -1 or col == size[1]-1):
                action = random.choice([0, 1, 2])

            else:
                action = random.randint(0, 3)  # cells where all four actions are possible

            # defining nextstate according to choosen action
            if action == 0:  # Up movement
                nextstate = Q[raw-1][col]
                rawtemp = raw - 1  # raw of nextstep
                coltemp = col  # col of nextstep
            elif action == 1:  # Down movememt
                nextstate = Q[raw+1][col]
                rawtemp = raw + 1  # raw of nextstep
                coltemp = col  # col of nextstep
            elif action == 2:  # Left movement
                nextstate = Q[raw][col-1]
                rawtemp = raw  # raw of nextstep
                coltemp = col - 1  # col of nextstep
            else:  # Right movement
                # ipdb.set_trace()
                nextstate = Q[raw][col+1]
                rawtemp = raw  # raw of nextstep
                coltemp = col + 1  # col of nextstep
            # ipdb.set_trace()
            # try executing the Q-iteration formula with no errors..
            '''
            _____ADD HERE____
            ACTION_PERFORMANCE FUNCTION
            UPDATE_REWARD FUNCTION
            '''
            ENClast = encoder.getData()
            act.playAction(action, raw, col, size[0], p, p1)
            time.sleep(0.1)
            if action == 0 or action == 1:
                ENClast = encoder.getData()
            ENC = encoder.getData()
            reward[raw][col][action] = ENC - ENClast
            # update_reward.update_reward(reward, raw, col, action, diff)

            try:
                Q[raw][col][action] = reward[raw][col][action] + gama * (max(nextstate))
		#print "Q", Q 
            # tracking if there is a type error (i.e. datatype missmatch) or not in above equation
            except TypeError as e:
                print("TypeError")
            raw = rawtemp
            col = coltemp
        print "qerror is", qError(Q,Qlast)
        print "reward is", reward
    # getting the appropriate action back from the given calculated values of Q matrix
    for r in range(0, size[0]):
        for c in range(0, size[1]):
            # ipdb.set_trace()
            a[r][c] = Q[r][c].index(max(Q[r][c]))
    # ipdb.set_trace()
    # function returns Q matrix, action matrix and nos of iteration
    return Q, a, iteration
示例#5
0
def qLearning(n, p, p1, encoder, ENClast):
    epsilon = 0.7
    v = initvalact.initvalact(n)
    Q = qinitial.qinitial(n)
    '''Q = [[[0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0]],  # State1,State2, Stete3
         [[0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0]],  # State4, State5, State6
         [[1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 1, 0]]]  # State7, State8, State9'''
    kMatrix = qinitial.qinitial(n)
    Tr = qinitial.qinitial(n)
    a = v[1]
    size = np.shape(Q)  # storing size of Q-matrix
    n = size[0]
    Qlast = generateDummy(
        Q)  # generating dummy of same sizq as Q to enter the while loop
    iteration = 0  # initializing the iteration
    reward = generate_rewardmatrix.generate_rewardmatrix(n)
    state = random.randint(1, size[0] * size[1])
    while qError(Q, Qlast) > 1.5 or Q == Qlast or iteration <= 100:
        iteration += 1  # incresing iteration value
        if iteration > 60:
            epsilon = 0.1
        Qlast = deepcopy(Q)  # copying Q to Qlast
        # state =  selecting state randomly 1every time depending on the Q size
        # temp = to retrive raw and column from Nos of state generated by random selector
        # state / Nos.of column will give us information about the raw number...
        # for 3x4 (raw x column) state 1 to 4 are raw in 1 and state 5 to 8 are raw in 2
        # for raw1(state 1 to 4)/4 (total columns) will be 0 < temp <= 1
        # for raw1(state 5 to 8)/4 (total columns) will be 1 < temp <= 2
        if iteration == 1:
            temp = state / (size[1] * 1.0)  # defining a temporary variable
            if ((temp).is_integer()):
                raw = int(temp) - 1
            else:
                raw = int(temp)
            # temp = modulo of state and Total column
            # for column1(state 1,5,9) % 4 (total columns) will be 1 [i.e colum = 1-1 = 0]
            # for column1(state 2,6,10) % 4 (total columns) will be 2 [i.e colum = 2-1 = 1]
            temp = state % size[1]
            col = temp - 1
            if col < 0:
                col = size[1] - 1
            else:
                pass
            gotopos.gotopos(raw, col, p, p1,
                            n)  # to go to state that is selected randomly
            time.sleep(0.3)

        possibleActions = action_select(raw, col, n)
        probablity = epsilon_greedy_policy(Q[raw][col], possibleActions,
                                           epsilon)
        actionIndex = np.random.choice(len(probablity), p=probablity)
        action = possibleActions[actionIndex]
        # action selection according to selction of state
        '''
        -------------------------------------------------------------
        ***************REPLACED BY EPSILON GREDDY POLICY*************
        -------------------------------------------------------------
        if i < NumOfSelAct:
            possibleActions = action_select(raw, col, n)
            tempList = []
            for i in possibleActions:
                tempList.append(Q[raw][col][i])
            action = possibleActions[tempList.index(max(tempList))]
        else:
            possibleActions = action_select(raw, col, n)
            action = random.choice(possibleActions)
        -------------------------------------------------------------
        ***************REPLACED BY EPSILON GREDDY POLICY*************
        -------------------------------------------------------------
        '''

        # defining nextstate according to choosen action
        if action == 0:  # Up movement
            nextstate = Q[raw - 1][col]
            rawtemp = raw - 1  # raw of nextstep
            coltemp = col  # col of nextstep
        elif action == 1:  # Down movememt
            nextstate = Q[raw + 1][col]
            rawtemp = raw + 1  # raw of nextstep
            coltemp = col  # col of nextstep
        elif action == 2:  # Left movement
            nextstate = Q[raw][col - 1]
            rawtemp = raw  # raw of nextstep
            coltemp = col - 1  # col of nextstep
        else:  # Right movement
            nextstate = Q[raw][col + 1]
            rawtemp = raw  # raw of nextstep
            coltemp = col + 1  # col of nextstep
        # try executing the Q-iteration formula with no errors..
        '''
        _____ADD HERE____
        ACTION_PERFORMANCE FUNCTION
        UPDATE_REWARD FUNCTION
	'''
        ENClast = encoder.getData()
        act.playAction(action, raw, col, size[0], p, p1)
        time.sleep(0.1)
        if action == 0 or action == 1:
            ENClast = encoder.getData()
        ENC = encoder.getData()
        diff = ENC - ENClast
        oldreward = reward[raw][col][action]
        if (oldreward != 0
                and diff == 0) or (np.sign(oldreward) != np.sign(diff)
                                   and oldreward != 0):
            #		restriCount[raw][col][action] += 1
            #		if restriCount[raw][col][action] < 3:
            print("!! restriction applied !!")
            gotopos.gotopos(raw, col, p, p1, n)
            time.sleep(0.3)
            ENClast = encoder.getData()
            act.playAction(action, raw, col, size[0], p, p1)
            time.sleep(0.1)
            if action == 0 or action == 1:
                ENClast = encoder.getData()
            ENC = encoder.getData()
            diff = ENC - ENClast
        reward[raw][col][action] = diff
        # update_reward.update_reward(reward, raw, col, action, diff)

        kMatrix[raw][col][action] = kMatrix[raw][col][action] + 1

        try:
            alpha = 1 / ((kMatrix[raw][col][action])**bita)
            eComplement = reward[raw][col][action] + gama * max(
                nextstate) - Q[raw][col][action]
            e = reward[raw][col][action] + gama * max(nextstate) - max(
                Q[raw][col])
            for r in range(size[0]):
                for c in range(size[1]):
                    for actn in action_select(raw, col, n):
                        Tr[r][c][actn] = gama * lamda * Tr[r][c][actn]
                        Q[r][c][
                            actn] = Q[r][c][actn] + alpha * Tr[r][c][actn] * e

            Q[raw][col][action] = Q[raw][col][action] + alpha * eComplement
            Tr[raw][col][action] += 1
        # tracking if there is a type error (i.e. datatype missmatch) or not in above equation
        except TypeError as e:
            print("TypeError")
        print possibleActions
        print probablity
        print "raw= ", raw, "col = ", col, "action = ", action
        raw = rawtemp
        col = coltemp
        print "qerror is", qError(Q, Qlast)
        print "reward is", reward
        print "iteration = ", iteration
    # getting the appropriate action back from the given calculated values of Q matrix
    print Tr
    print Q
    for r in range(0, size[0]):
        for c in range(0, size[1]):
            possibleActions = action_select(r, c, n)
            tempList = []
            for i in possibleActions:
                tempList.append(Q[r][c][i])
            a[r][c] = possibleActions[tempList.index(max(tempList))]
    # ipdb.set_trace()
    # function returns Q matrix, action matrix and nos of iteration
    return Q, a, iteration
示例#6
0
def qLearning(n, p, p1, encoder, ENClast):
    v = initvalact.initvalact(n)
    Q = qinitial.qinitial(n)
    kMatrix = qinitial.qinitial(n)
    restriCount = qinitial.qinitial(n)
    bita = 1/2
    # Q = [[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]],[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]]
    a = v[1]
    # a = [[None, None, None], [None, None, None],  [None, None, None]]  # initializing action matrix
    # size = reading size of the given Q matrix [Nos of raws, Nos. of col, Nos. of actions possible per state]
    size = np.shape(Q)  # storing size of Q-matrix
    n = size[0]
    Qlast = generateDummy(Q)  # generating dummy of same sizq as Q to enter the while loop
    iteration = 0  # initializing the iteration
    reward = generate_rewardmatrix.generate_rewardmatrix(n)
    # check for the error value to be 10**-3 or Q = Qlast
    global val1
    val1 = pinSetup.valueRead_ON()
    while (qError(Q, Qlast) > 1.5 or Q == Qlast or iteration <= 4*n) and (val1 == 0):
        # we want here Q!=Qlast becauses in starting phase if reward is zero in next step we will read error = 0
        # and this will cause us to fall out of the loop
        iteration += 1  # incresing iteration value
        Qlast = deepcopy(Q)  # copying Q to Qlast
        # state =  selecting state randomly 1every time depending on the Q size
        state = random.randint(1, size[0] * size[1])
        # temp = to retrive raw and column from Nos of state generated by random selector
        # state / Nos.of column will give us information about the raw number...
        # for 3x4 (raw x column) state 1 to 4 are raw in 1 and state 5 to 8 are raw in 2
        # for raw1(state 1 to 4)/4 (total columns) will be 0 < temp <= 1
        # for raw1(state 5 to 8)/4 (total columns) will be 1 < temp <= 2
        temp = state / (size[1] * 1.0)  # defining a temporary variable
        if ((temp).is_integer()):
            raw = int(temp) - 1
        else:
            raw = int(temp)
        # temp = modulo of state and Total column
        # for column1(state 1,5,9) % 4 (total columns) will be 1 [i.e colum = 1-1 = 0]
        # for column1(state 2,6,10) % 4 (total columns) will be 2 [i.e colum = 2-1 = 1]
        temp = state % size[1]
        col = temp - 1
        if col < 0:
            col = size[1] - 1
        else:
            pass
        gotopos.gotopos(raw, col, p, p1, n)  # to go to state that is selected randomly
        time.sleep(0.3)
        # ipdb.set_trace()
        NumOfSelAct = 100*(1-1/(np.exp(0.05*(iteration-2))))
        NumOfSelAct = round(NumOfSelAct*25/100)

        for i in range(0, 20):
            # action selection according to selction of state
            if i < NumOfSelAct:
                possibleActions = action_select(raw, col, n)
                tempList = []
                for j in possibleActions:
                    tempList.append(Q[raw][col][j])
                action = possibleActions[tempList.index(max(tempList))]
		print ("for i") , i, ("selected action is"), action
            else:
                possibleActions = action_select(raw, col, n)
                action = random.choice(possibleActions)

            # defining nextstate according to choosen action
            if action == 0:  # Up movement
                nextstate = Q[raw-1][col]
                rawtemp = raw - 1  # raw of nextstep
                coltemp = col  # col of nextstep
            elif action == 1:  # Down movememt
                nextstate = Q[raw+1][col]
                rawtemp = raw + 1  # raw of nextstep
                coltemp = col  # col of nextstep
            elif action == 2:  # Left movement
                nextstate = Q[raw][col-1]
                rawtemp = raw  # raw of nextstep
                coltemp = col - 1  # col of nextstep
            else:  # Right movement
                # ipdb.set_trace()
                nextstate = Q[raw][col+1]
                rawtemp = raw  # raw of nextstep
                coltemp = col + 1  # col of nextstep
            # ipdb.set_trace()
            # try executing the Q-iteration formula with no errors..
            '''
            _____ADD HERE____
            ACTION_PERFORMANCE FUNCTION
            UPDATE_REWARD FUNCTION
            '''
            ENClast = encoder.getData()
            act.playAction(action, raw, col, size[0], p, p1)
            time.sleep(0.1)
            if action == 0 or action == 1:
                ENClast = encoder.getData()
            ENC = encoder.getData()
	    if ENC - ENClast > 0:
		diff = 1
		if ENC - ENClast > 1:
		    diff = 2
	    elif ENC - ENClast < 0:
		diff = -1
		if ENC - ENClast < -1:
		    diff = -2
	    else:
		diff = 0
	    oldreward = reward[raw][col][action]
	    if (oldreward != 0 and diff == 0) or (np.sign(oldreward)!=np.sign(diff)):
#		restriCount[raw][col][action] += 1
#		if restriCount[raw][col][action] < 3:
    		    print ("!! restriction applied !!")
		    restriCount[raw][col][action] = 0
		    gotopos.gotopos(raw, col, p, p1, n)
		    time.sleep(0.3)
		    ENClast = encoder.getData()
            	    act.playAction(action, raw, col, size[0], p, p1)
                    time.sleep(0.1)
		    if action == 0 or action == 1:
                	ENClast = encoder.getData()
            	    ENC = encoder.getData()
		    diff = ENC - ENClast
	    direction = pinSetup.valueRead_dir()
            reward[raw][col][action] = ((-1)**direction)*diff
            # update_reward.update_reward(reward, raw, col, action, diff)
            kMatrix[raw][col][action] = kMatrix[raw][col][action] + 1
            try:
		alpha = 1/((kMatrix[raw][col][action])**bita)
                Q[raw][col][action] = (1-alpha) * Q[raw][col][action] + \
                    alpha * (reward[raw][col][action] + gama * (max(nextstate)))
                # print "Q", Q
            # tracking if there is a type error (i.e. datatype missmatch) or not in above equation
            except TypeError as e:
                print("TypeError")
            raw = rawtemp
            col = coltemp
	print "iteration is", iteration
        print "qerror is", qError(Q, Qlast)
        print "reward is", reward
	val1 = pinSetup.valueRead_ON()
    # getting the appropriate action back from the given calculated values of Q matrix
    if val1 == 1:
        #import os
        print "Stop"
        #os.system("shutdown now")
    for r in range(0, size[0]):
        for c in range(0, size[1]):
            # ipdb.set_trace()
            possibleActions = action_select(r, c, n)
            tempList = []
            for i in possibleActions:
                tempList.append(Q[r][c][i])
            a[r][c] = possibleActions[tempList.index(max(tempList))]
    # ipdb.set_trace()
    # function returns Q matrix, action matrix and nos of iteration
    print kMatrix
#    print NumOfSelAct
    return Q, a, iteration
示例#7
0
pinVar = pinSetup.pinSetup()
p = pinVar[0]
p1 = pinVar[1]
encoder = pinVar[2]
ENClast = pinVar[3]
i=1
'''
try:
    while True:
	ENC = encoder.getData()
        print ENC
        time.sleep(0.05)
except KeyboardInterrupt:
    print('interrupted!')
'''

p.start(4.0)
p1.start(6.5)
gotopos.gotopos(2,2,p,p1)
encoder.setData(0)
time.sleep(2.0)
action_20.playAction(1, 1, 1, 3, p, p1)
time.sleep(0.25)
ENC = encoder.getData()
print ENC
action_20.playAction(2, 2, 1, 3, p, p1)
time.sleep(0.25)
ENC = encoder.getData()
print ENC
time.sleep(2.05)