Exemplo n.º 1
0
 def __init__(self):
     self.ai = qlearn.QLearn(actions=range(directions),
                             epsilon=0.1,
                             alpha=0.1,
                             gamma=0.9)
     self.lastAction = None
     self.score = 0
Exemplo n.º 2
0
 def __init__(self):
     self.ai = qlearn.QLearn(
         actions=range(directions), epsilon=0.1, alpha=0.1, gamma=0.9)
     self.lastAction = None
     self.score = 0
     self.episode = 0                #Own Implementation
     self.QAverageQ = []             #Own Implementation
     self.actionsInEpisode = 0       #Own Implementation
     self.actionsInAllEpisodes = []  #Own Implementation
 def __init__(self):
     self.ai = None
     self.ai = qlearn.QLearn(actions=list(range(directions)),
                             alpha=0.1,
                             gamma=0.9,
                             epsilon=0.1)
     self.eaten = 0
     self.fed = 0
     self.lastState = None
     self.lastAction = None
Exemplo n.º 4
0
 def __init__(self):
     self.actions = range(directions)
     self.egoAI = qlearn.QLearn(
         actions=self.actions, epsilon=0.05, alpha=0.1, gamma=.05)
     self.lastAction = None
     self.hitWall = False
     self.score = 0
     self.intentional_deaths = 0
     self.unintentional_deaths = 0
     self.intentional = True
Exemplo n.º 5
0
    def __init__(self, puzzleSize):
        # alpha ... learning rate between 0-1 (0 means never update Q-values)
        # gamma ... discount factor between 0-1 (higher means the algorithm looks farther into the future - at 1
        #           infinite rewards possible -> dont go to 1)
        # epsilon ... exploration factor between 0-1 (chance of taking a random action)

        # set values, epsilon will be periodically overwritten (see pre train section farther down) until it reaches 0
        # testing alpha = 1 instead of 0.1
        self.ai = learner.QLearn(puzzleSize=puzzleSize,
                                 epsilon=epsilonStartVal,
                                 alpha=alphaVal,
                                 gamma=gammaVal)
        self.lastState = None
        self.lastAction = None
        self.solved = 0
        self.age = 0
        # all tile swaps that have been done
        self.movesDone = 0
        # all actions that have been taken = all attempted swaps
        self.actionsTaken = 0
        self.puzzleSize = puzzleSize
        # 2d array containing values describing which numbers are in which positions [pos] = value
        # for size = 2, state[1][0] = c:
        # a b
        # c d
        self.randomizer = puzzleRandomizer.Randomizer(self.puzzleSize)
        # create random solvable puzzle start
        self.state = self.randomizer.makeRandomPuzzle(self.solved)
        # describes position of the empty cell (value = 0) (x,y)
        self.emptyCellPos = self.initEmptyCellPos()
        # up, down, left, right
        self.direction_list = [(-1, 0), (1, 0), (0, -1), (0, 1)]
        # create dict of cells in the puzzle that are neighbours to each other
        self.neighbours = self.initNeighbours()
        # create dict to get 2d-positions from 1d-position: (x,y)
        self.positionConverter = self.init1dTo2dPositionConverter()
        # create array equal to state, but with the expected solutions instead
        self.solution = self.initSolvedPosition()
        # self.display = display.makeDisplay(self)
        # init variables to calc averages
        self.solveCount = 0
        self.totalMoves = 0
        self.totalTime = 0
        self.steps = 0

        #self.currentManhattan = self.getManhattanDistance(self.state, self.solution)
        #self.lastManhattan = self.currentManhattan
        self.goalPositions = self.createGoalPositionsPerTile()

        # get manhattan distance for tile num at pos y,x via self.manhattanPerTile[num][(y,x)]
        self.manhattanPerTile = self.createManhattanPerTile()

        # get manhattan distance for a board state [[1,2,3],[4,5,6],[7,8,0]]
        # via self.manhattanPerBoard[(1,2,3,4,5,6,7,8,0)]
        self.manhattanPerBoard = self.createManhattanPerBoard()
Exemplo n.º 6
0
 def __init__(self):
     self.ai = None
     self.ai = qlearn.QLearn(actions=range(4),
                             alpha=0.1,
                             gamma=0.9,
                             epsilon=0.1)
     self.guardWin = 0
     self.thiefWin = 0
     self.lastState = None
     self.lastAction = None
     self.color = cfg.thief_color
Exemplo n.º 7
0
    def __init__(self):
        self.ai = None
        self.ai = qlearn.QLearn(actions=xrange(cfg.directions),
                                alpha=0.1,
                                gamma=0.9,
                                epsilon=0.1)
        self.catWin = 0
        self.mouseWin = 0
        self.lastState = None
        self.lastAction = None
        self.color = cfg.mouse_color

        print 'mouse init...'
Exemplo n.º 8
0
    def __init__(self, allo_weight=.5, ego_weight=.5, weight_learning=True):

        self.actions = range(directions)
        self.epsilon = .1
        self.eta = 5e-6
        self.lastAction = None
        self.lastaction_index = None
        self.hitWall = False
        self.score = 0
        self.intentional_deaths = 0
        self.unintentional_deaths = 0
        self.intentional = True

        self.allo_weight = allo_weight
        self.alloAI = qlearn.QLearn(actions=self.actions,
                                    epsilon=0.05,
                                    alpha=0.1,
                                    gamma=.9)
        self.ego_weight = ego_weight
        self.egoAI = qlearn.QLearn(actions=self.actions,
                                   epsilon=0.05,
                                   alpha=0.1,
                                   gamma=.0)
        self.weight_learning = weight_learning
Exemplo n.º 9
0
    def __init__(self, datas):
        # feature 0~7: flight number dummy variables
        # feature 8: departure date; feature 9: observed date state;
        # feature 10: current price
        self.datas = datas # datas have same departure date
        self.actions = 2  # action=0 for buy; action=1 for wait.

        states = np.unique(self.datas[:,9])
        self.maxStates = max(states) # states range from 0 to maxStates(totally maxStates+1)
        self.qlearning = qlearn.QLearn(self.actions, self.maxStates)

        # initialize the action = buy
        for i in range(self.datas.shape[0]):
            state = self.datas[i, 9]
            reward = -1 * self.datas[i, 10]
            self.qlearning.updateQForState(state, 0, reward)

        # initialize the action = wait
        for state in range(int(self.maxStates+1)):
            reward = -1 * self.getMinimumFuturePrice(state)
            self.qlearning.updateQForState(state, 1, reward)



        """
        # initialize the action = buy
        for state in range(self.maxStates+1):
            try:
                reward = -1 * self.getPrice(state)
                self.qlearning.updateQForState(state, 0, reward)
            except: # a little tricky here
                print "Exception: state {:d}, action buy".format(state)
                reward = -1 * self.getPrice(state-1)
                self.qlearning.updateQForState(state, 0, reward)

        # initialize the action = wait
        for state in range(self.maxStates+1):
            try:
                reward = -1 * self.getMinimumFuturePrice(state)
                self.qlearning.updateQForState(state, 1, reward)
            except:
                print self.getMinimumFuturePrice(66)
                print "Exception: state {:d}, action wait".format(state)
        """
        # for state = 0, the action = wait means nothing
        self.qlearning.updateQForState(0, 1, -1 * self.getPrice(0))
Exemplo n.º 10
0
    def __init__(self):
        self.ai = None
        self.ai = qlearn.QLearn(actions=range(cfg.directions),
                                alpha=cfg.alpha,
                                gamma=cfg.gamma,
                                epsilon=cfg.epsilon)
        self.catWin = 0
        self.mouseWin = 0
        self.round = 1
        self.wincount = 0
        self.lastState = None
        self.lastAction = None
        self.logfilename = 'log-' + datetime.now().strftime(
            '%Y%m%d-%H%M%S') + '-' + str(os.getpid()) + '.txt'
        self.color = cfg.mouse_color

        self.load_state()

        print('mouse init...')
Exemplo n.º 11
0
    def __init__(self, actions, qfile="qtable.txt"):
        self.actions = actions

        qtable = {}
        # line[i] = (4239, 'right'):-1
        with open(qfile, "r") as f:
            lines = f.readlines()
        for line in lines:
            line = line.split(":")
            # ["(69210, 'right')", '-1\n']

            index = line[0].split(",")
            index[0] = int(re.sub("[^A-Za-z0-9]+", "", index[0]))
            index[1] = re.sub("[^A-Za-z0-9]+", "", index[1])
            value = line[1].strip("\n")
            value = float(value)
            qtable[(index[0], index[1])] = value

        self.ai = qlearn.QLearn(actions, q=qtable, c=0, alpha=0.7, gamma=0.5)
Exemplo n.º 12
0
    # Loads parameters from the ROS param server
    # Parameters are stored in a yaml file inside the config directory
    # They are loaded at runtime by the launch file
    Alpha = rospy.get_param("/turtlebot2/alpha")
    Epsilon = rospy.get_param("/turtlebot2/epsilon")
    Gamma = rospy.get_param("/turtlebot2/gamma")
    epsilon_discount = rospy.get_param("/turtlebot2/epsilon_discount")
    nepisodes = rospy.get_param("/turtlebot2/nepisodes")
    nsteps = rospy.get_param("/turtlebot2/nsteps")

    running_step = rospy.get_param("/turtlebot2/running_step")

    # Initialises the algorithm that we are going to use for learning
    qlearn = qlearn.QLearn(states=range(env.observation_space.n), 
                           actions=range(env.action_space.n),
                           alpha=Alpha, gamma=Gamma, epsilon=Epsilon)
    initial_epsilon = qlearn.epsilon

    start_time = time.time()
    highest_reward = 0

    # Starts the main training loop: the one about the episodes to do
    for x in range(nepisodes):
        rospy.logdebug("############### WALL START EPISODE=>" + str(x))

        cumulated_reward = 0
        done = False
        if qlearn.epsilon > 0.05:
            qlearn.epsilon *= epsilon_discount
Exemplo n.º 13
0
 def __init__(self, actions, c=0.3, alpha=0.7, gamma=0.5, cdecay=0.999):
     self.actions = actions
     self.ai = qlearn.QLearn(actions, c=c, alpha=alpha, gamma=gamma)
Exemplo n.º 14
0
    rospy.loginfo("Monitor Wrapper started")

    last_time_steps = numpy.ndarray(0)

    # Loads parameters from the ROS param server
    # Parameters are stored in a yaml file inside the config directory
    # They are loaded at runtime by the launch file
    Alpha = rospy.get_param("/monoped/alpha")
    Epsilon = rospy.get_param("/monoped/epsilon")
    Gamma = rospy.get_param("/monoped/gamma")
    epsilon_discount = rospy.get_param("/monoped/epsilon_discount")
    nepisodes = rospy.get_param("/monoped/nepisodes")
    nsteps = rospy.get_param("/monoped/nsteps")

    # Initialises the algorithm that we are going to use for learning
    qlearn = qlearn.QLearn(actions=range(env.action_space.n),
                           alpha=Alpha, gamma=Gamma, epsilon=Epsilon)
    initial_epsilon = qlearn.epsilon

    start_time = time.time()
    highest_reward = 0

    # Starts the main training loop: the one about the episodes to do
    for x in range(nepisodes):
        rospy.logdebug("############### START EPISODE=>" + str(x))

        cumulated_reward = 0
        done = False
        if qlearn.epsilon > 0.05:
            qlearn.epsilon *= epsilon_discount

        # Initialize the environment and get first state of the robot
Exemplo n.º 15
0
observation_n = env.reset()
"""
print observation_n

for i in range(10):
    observation = env.reset()
    for j in range(10):
        env.render()
        action = np.random.randint(4)
        observation, reward, done, info = env.step(action)
        print observation
"""

#Init qlearning
qlearn = qlearn.QLearn(actions=range(3), alpha=0.2, gamma=0.8, epsilon=0.9)
initial_epsilon = qlearn.epsilon
epsilon_discount = 0.9986

save_ep = 1

episode = 0
start_time = time.time()
while (True):
    episode += 1
    observation = env.reset()
    total_reward = 0
    if qlearn.epsilon > 0.05:
        qlearn.epsilon *= epsilon_discount
    state = ''
    for i in observation:
Exemplo n.º 16
0
    def plot_scoreMap(self):
        vals = [prisonersDelima.p_map[x] for x in self.history]
        lst = [*zip(*vals)]
        p1 = Cumulative(lst[0])
        aiR = Cumulative(lst[1])
        plt.plot(aiR, 'r', label="Q_ai")
        plt.plot(p1, 'b', label="pl_1")
        plt.xlabel("iterations")
        plt.ylabel("score along itretions")
        plt.legend()
        plt.show()


if __name__ == "__main__":
    pd = prisonersDelima(5)
    ai = qlearn.QLearn(epsilon=qEpsilon, lambd=qLambda, alpha=qAlpha)
    ai.setActions(['c', 'd'])

    def take_action(history):
        #print("in Q")
        if len(history) > 0:
            pnts = prisonersDelima.evaluate_points(history)
            reward = pnts[1] - pnts[0]
            state = history[-1]
            ai.learn(state, reward)
            choice = ai.do(state)
            return (choice)
        else:
            return ('c')

    pd.set_plr_func(take_action)
Exemplo n.º 17
0
    state = observation.copy()
    linear_x = np.amin([state[i] for i in largest_gap]) * 0.2
    angular_z = mid_largest_gap
    action = int(float(mid_largest_gap) / num_ranges * num_actions)
    return action


if __name__ == '__main__':

    env = gym.make('GazeboCircuit2TurtlebotLidar-v0')

    last_time_steps = numpy.ndarray(0)

    qlearn = qlearn.QLearn(actions=np.arange(7),
                           alpha=0.2,
                           gamma=0.8,
                           epsilon=0.9)

    initial_epsilon = qlearn.epsilon

    epsilon_discount = 0.9986

    start_time = time.time()
    total_episodes = 10000
    highest_reward = 0
    teach_episodes = 2

    print("Teaching...")
    for x in range(teach_episodes):
        done = False
        observation = env.reset()
Exemplo n.º 18
0
"""
print observation_n

for i in range(10):
    observation = env.reset()
    for j in range(10):
        env.render()
        action = np.random.randint(4)
        observation, reward, done, info = env.step(action)
        print observation
"""

#Init qlearning
qlearn = qlearn.QLearn(actions=range(8),
                       alpha=0.2,
                       gamma=0.8,
                       epsilon=1.0,
                       _file='q-table-fw1.txt')
initial_epsilon = qlearn.epsilon
epsilon_discount = 0.9986

save_ep = 1

episode = 0
start_time = time.time()

#reward and step
rList = []
sList = []
with open('step_list_fw1.txt', 'r') as f:
    for line in f:
Exemplo n.º 19
0
        env.render()
    elif ((x-render_episodes)%render_interval == 0) and (x != 0) and (x > render_skip) and (render_episodes < x):
        env.render(close=True)

if __name__ == '__main__':

    env = gym.make('GazeboCircuitTurtlebotLidar-v0')
    print "Gym Makde done"
    outdir = '/home/user/catkin_ws/src/gym_construct/src/gazebo_gym_experiments'
    # env.monitor.start(outdir, force=True, seed=None)       # I had to comment this and
    env = wrappers.Monitor(env, outdir, force=True)          # use this to avoid warnings
    #plotter = LivePlot(outdir)
    print "Monitor Wrapper started"
    last_time_steps = numpy.ndarray(0)

    qlearn = qlearn.QLearn(actions=range(env.action_space.n),
                    alpha=0.1, gamma=0.8, epsilon=0.9)

    initial_epsilon = qlearn.epsilon

    epsilon_discount = 0.999 # 1098 eps to reach 0.1

    start_time = time.time()
    total_episodes = 10
    highest_reward = 0

    for x in range(total_episodes):
        done = False

        cumulated_reward = 0 #Should going forward give more reward then L/R ?
        print ("Episode = "+str(x))
        observation = env.reset()
Exemplo n.º 20
0
import numpy as np
import matplotlib.pyplot as plt
import math
import environment
import qlearn
import pickle

PI = math.pi

if __name__ == '__main__':
    # Establish Communication

    last_time_steps = np.ndarray(0)
    environment = environment.Environment()
    qlearn = qlearn.QLearn(actions=range(len(environment.action_space)),
                           alpha=0.2,
                           gamma=0.8,
                           epsilon=0.9)

    initial_epsilon = qlearn.epsilon

    epsilon_discount = 0.9986

    start_time = time.time()
    total_episodes = 10000
    highest_reward = 0

    f = open('q_table.txt', 'a')
    f2 = open('q_table_list.pickle', 'wb')
    for x in range(total_episodes):
        done = False