def __init__(self): self.ai = qlearn.QLearn(actions=range(directions), epsilon=0.1, alpha=0.1, gamma=0.9) self.lastAction = None self.score = 0
def __init__(self): self.ai = qlearn.QLearn( actions=range(directions), epsilon=0.1, alpha=0.1, gamma=0.9) self.lastAction = None self.score = 0 self.episode = 0 #Own Implementation self.QAverageQ = [] #Own Implementation self.actionsInEpisode = 0 #Own Implementation self.actionsInAllEpisodes = [] #Own Implementation
def __init__(self): self.ai = None self.ai = qlearn.QLearn(actions=list(range(directions)), alpha=0.1, gamma=0.9, epsilon=0.1) self.eaten = 0 self.fed = 0 self.lastState = None self.lastAction = None
def __init__(self): self.actions = range(directions) self.egoAI = qlearn.QLearn( actions=self.actions, epsilon=0.05, alpha=0.1, gamma=.05) self.lastAction = None self.hitWall = False self.score = 0 self.intentional_deaths = 0 self.unintentional_deaths = 0 self.intentional = True
def __init__(self, puzzleSize): # alpha ... learning rate between 0-1 (0 means never update Q-values) # gamma ... discount factor between 0-1 (higher means the algorithm looks farther into the future - at 1 # infinite rewards possible -> dont go to 1) # epsilon ... exploration factor between 0-1 (chance of taking a random action) # set values, epsilon will be periodically overwritten (see pre train section farther down) until it reaches 0 # testing alpha = 1 instead of 0.1 self.ai = learner.QLearn(puzzleSize=puzzleSize, epsilon=epsilonStartVal, alpha=alphaVal, gamma=gammaVal) self.lastState = None self.lastAction = None self.solved = 0 self.age = 0 # all tile swaps that have been done self.movesDone = 0 # all actions that have been taken = all attempted swaps self.actionsTaken = 0 self.puzzleSize = puzzleSize # 2d array containing values describing which numbers are in which positions [pos] = value # for size = 2, state[1][0] = c: # a b # c d self.randomizer = puzzleRandomizer.Randomizer(self.puzzleSize) # create random solvable puzzle start self.state = self.randomizer.makeRandomPuzzle(self.solved) # describes position of the empty cell (value = 0) (x,y) self.emptyCellPos = self.initEmptyCellPos() # up, down, left, right self.direction_list = [(-1, 0), (1, 0), (0, -1), (0, 1)] # create dict of cells in the puzzle that are neighbours to each other self.neighbours = self.initNeighbours() # create dict to get 2d-positions from 1d-position: (x,y) self.positionConverter = self.init1dTo2dPositionConverter() # create array equal to state, but with the expected solutions instead self.solution = self.initSolvedPosition() # self.display = display.makeDisplay(self) # init variables to calc averages self.solveCount = 0 self.totalMoves = 0 self.totalTime = 0 self.steps = 0 #self.currentManhattan = self.getManhattanDistance(self.state, self.solution) #self.lastManhattan = self.currentManhattan self.goalPositions = self.createGoalPositionsPerTile() # get manhattan distance for tile num at pos y,x via self.manhattanPerTile[num][(y,x)] self.manhattanPerTile = self.createManhattanPerTile() # get manhattan distance for a board state [[1,2,3],[4,5,6],[7,8,0]] # via self.manhattanPerBoard[(1,2,3,4,5,6,7,8,0)] self.manhattanPerBoard = self.createManhattanPerBoard()
def __init__(self): self.ai = None self.ai = qlearn.QLearn(actions=range(4), alpha=0.1, gamma=0.9, epsilon=0.1) self.guardWin = 0 self.thiefWin = 0 self.lastState = None self.lastAction = None self.color = cfg.thief_color
def __init__(self): self.ai = None self.ai = qlearn.QLearn(actions=xrange(cfg.directions), alpha=0.1, gamma=0.9, epsilon=0.1) self.catWin = 0 self.mouseWin = 0 self.lastState = None self.lastAction = None self.color = cfg.mouse_color print 'mouse init...'
def __init__(self, allo_weight=.5, ego_weight=.5, weight_learning=True): self.actions = range(directions) self.epsilon = .1 self.eta = 5e-6 self.lastAction = None self.lastaction_index = None self.hitWall = False self.score = 0 self.intentional_deaths = 0 self.unintentional_deaths = 0 self.intentional = True self.allo_weight = allo_weight self.alloAI = qlearn.QLearn(actions=self.actions, epsilon=0.05, alpha=0.1, gamma=.9) self.ego_weight = ego_weight self.egoAI = qlearn.QLearn(actions=self.actions, epsilon=0.05, alpha=0.1, gamma=.0) self.weight_learning = weight_learning
def __init__(self, datas): # feature 0~7: flight number dummy variables # feature 8: departure date; feature 9: observed date state; # feature 10: current price self.datas = datas # datas have same departure date self.actions = 2 # action=0 for buy; action=1 for wait. states = np.unique(self.datas[:,9]) self.maxStates = max(states) # states range from 0 to maxStates(totally maxStates+1) self.qlearning = qlearn.QLearn(self.actions, self.maxStates) # initialize the action = buy for i in range(self.datas.shape[0]): state = self.datas[i, 9] reward = -1 * self.datas[i, 10] self.qlearning.updateQForState(state, 0, reward) # initialize the action = wait for state in range(int(self.maxStates+1)): reward = -1 * self.getMinimumFuturePrice(state) self.qlearning.updateQForState(state, 1, reward) """ # initialize the action = buy for state in range(self.maxStates+1): try: reward = -1 * self.getPrice(state) self.qlearning.updateQForState(state, 0, reward) except: # a little tricky here print "Exception: state {:d}, action buy".format(state) reward = -1 * self.getPrice(state-1) self.qlearning.updateQForState(state, 0, reward) # initialize the action = wait for state in range(self.maxStates+1): try: reward = -1 * self.getMinimumFuturePrice(state) self.qlearning.updateQForState(state, 1, reward) except: print self.getMinimumFuturePrice(66) print "Exception: state {:d}, action wait".format(state) """ # for state = 0, the action = wait means nothing self.qlearning.updateQForState(0, 1, -1 * self.getPrice(0))
def __init__(self): self.ai = None self.ai = qlearn.QLearn(actions=range(cfg.directions), alpha=cfg.alpha, gamma=cfg.gamma, epsilon=cfg.epsilon) self.catWin = 0 self.mouseWin = 0 self.round = 1 self.wincount = 0 self.lastState = None self.lastAction = None self.logfilename = 'log-' + datetime.now().strftime( '%Y%m%d-%H%M%S') + '-' + str(os.getpid()) + '.txt' self.color = cfg.mouse_color self.load_state() print('mouse init...')
def __init__(self, actions, qfile="qtable.txt"): self.actions = actions qtable = {} # line[i] = (4239, 'right'):-1 with open(qfile, "r") as f: lines = f.readlines() for line in lines: line = line.split(":") # ["(69210, 'right')", '-1\n'] index = line[0].split(",") index[0] = int(re.sub("[^A-Za-z0-9]+", "", index[0])) index[1] = re.sub("[^A-Za-z0-9]+", "", index[1]) value = line[1].strip("\n") value = float(value) qtable[(index[0], index[1])] = value self.ai = qlearn.QLearn(actions, q=qtable, c=0, alpha=0.7, gamma=0.5)
# Loads parameters from the ROS param server # Parameters are stored in a yaml file inside the config directory # They are loaded at runtime by the launch file Alpha = rospy.get_param("/turtlebot2/alpha") Epsilon = rospy.get_param("/turtlebot2/epsilon") Gamma = rospy.get_param("/turtlebot2/gamma") epsilon_discount = rospy.get_param("/turtlebot2/epsilon_discount") nepisodes = rospy.get_param("/turtlebot2/nepisodes") nsteps = rospy.get_param("/turtlebot2/nsteps") running_step = rospy.get_param("/turtlebot2/running_step") # Initialises the algorithm that we are going to use for learning qlearn = qlearn.QLearn(states=range(env.observation_space.n), actions=range(env.action_space.n), alpha=Alpha, gamma=Gamma, epsilon=Epsilon) initial_epsilon = qlearn.epsilon start_time = time.time() highest_reward = 0 # Starts the main training loop: the one about the episodes to do for x in range(nepisodes): rospy.logdebug("############### WALL START EPISODE=>" + str(x)) cumulated_reward = 0 done = False if qlearn.epsilon > 0.05: qlearn.epsilon *= epsilon_discount
def __init__(self, actions, c=0.3, alpha=0.7, gamma=0.5, cdecay=0.999): self.actions = actions self.ai = qlearn.QLearn(actions, c=c, alpha=alpha, gamma=gamma)
rospy.loginfo("Monitor Wrapper started") last_time_steps = numpy.ndarray(0) # Loads parameters from the ROS param server # Parameters are stored in a yaml file inside the config directory # They are loaded at runtime by the launch file Alpha = rospy.get_param("/monoped/alpha") Epsilon = rospy.get_param("/monoped/epsilon") Gamma = rospy.get_param("/monoped/gamma") epsilon_discount = rospy.get_param("/monoped/epsilon_discount") nepisodes = rospy.get_param("/monoped/nepisodes") nsteps = rospy.get_param("/monoped/nsteps") # Initialises the algorithm that we are going to use for learning qlearn = qlearn.QLearn(actions=range(env.action_space.n), alpha=Alpha, gamma=Gamma, epsilon=Epsilon) initial_epsilon = qlearn.epsilon start_time = time.time() highest_reward = 0 # Starts the main training loop: the one about the episodes to do for x in range(nepisodes): rospy.logdebug("############### START EPISODE=>" + str(x)) cumulated_reward = 0 done = False if qlearn.epsilon > 0.05: qlearn.epsilon *= epsilon_discount # Initialize the environment and get first state of the robot
observation_n = env.reset() """ print observation_n for i in range(10): observation = env.reset() for j in range(10): env.render() action = np.random.randint(4) observation, reward, done, info = env.step(action) print observation """ #Init qlearning qlearn = qlearn.QLearn(actions=range(3), alpha=0.2, gamma=0.8, epsilon=0.9) initial_epsilon = qlearn.epsilon epsilon_discount = 0.9986 save_ep = 1 episode = 0 start_time = time.time() while (True): episode += 1 observation = env.reset() total_reward = 0 if qlearn.epsilon > 0.05: qlearn.epsilon *= epsilon_discount state = '' for i in observation:
def plot_scoreMap(self): vals = [prisonersDelima.p_map[x] for x in self.history] lst = [*zip(*vals)] p1 = Cumulative(lst[0]) aiR = Cumulative(lst[1]) plt.plot(aiR, 'r', label="Q_ai") plt.plot(p1, 'b', label="pl_1") plt.xlabel("iterations") plt.ylabel("score along itretions") plt.legend() plt.show() if __name__ == "__main__": pd = prisonersDelima(5) ai = qlearn.QLearn(epsilon=qEpsilon, lambd=qLambda, alpha=qAlpha) ai.setActions(['c', 'd']) def take_action(history): #print("in Q") if len(history) > 0: pnts = prisonersDelima.evaluate_points(history) reward = pnts[1] - pnts[0] state = history[-1] ai.learn(state, reward) choice = ai.do(state) return (choice) else: return ('c') pd.set_plr_func(take_action)
state = observation.copy() linear_x = np.amin([state[i] for i in largest_gap]) * 0.2 angular_z = mid_largest_gap action = int(float(mid_largest_gap) / num_ranges * num_actions) return action if __name__ == '__main__': env = gym.make('GazeboCircuit2TurtlebotLidar-v0') last_time_steps = numpy.ndarray(0) qlearn = qlearn.QLearn(actions=np.arange(7), alpha=0.2, gamma=0.8, epsilon=0.9) initial_epsilon = qlearn.epsilon epsilon_discount = 0.9986 start_time = time.time() total_episodes = 10000 highest_reward = 0 teach_episodes = 2 print("Teaching...") for x in range(teach_episodes): done = False observation = env.reset()
""" print observation_n for i in range(10): observation = env.reset() for j in range(10): env.render() action = np.random.randint(4) observation, reward, done, info = env.step(action) print observation """ #Init qlearning qlearn = qlearn.QLearn(actions=range(8), alpha=0.2, gamma=0.8, epsilon=1.0, _file='q-table-fw1.txt') initial_epsilon = qlearn.epsilon epsilon_discount = 0.9986 save_ep = 1 episode = 0 start_time = time.time() #reward and step rList = [] sList = [] with open('step_list_fw1.txt', 'r') as f: for line in f:
env.render() elif ((x-render_episodes)%render_interval == 0) and (x != 0) and (x > render_skip) and (render_episodes < x): env.render(close=True) if __name__ == '__main__': env = gym.make('GazeboCircuitTurtlebotLidar-v0') print "Gym Makde done" outdir = '/home/user/catkin_ws/src/gym_construct/src/gazebo_gym_experiments' # env.monitor.start(outdir, force=True, seed=None) # I had to comment this and env = wrappers.Monitor(env, outdir, force=True) # use this to avoid warnings #plotter = LivePlot(outdir) print "Monitor Wrapper started" last_time_steps = numpy.ndarray(0) qlearn = qlearn.QLearn(actions=range(env.action_space.n), alpha=0.1, gamma=0.8, epsilon=0.9) initial_epsilon = qlearn.epsilon epsilon_discount = 0.999 # 1098 eps to reach 0.1 start_time = time.time() total_episodes = 10 highest_reward = 0 for x in range(total_episodes): done = False cumulated_reward = 0 #Should going forward give more reward then L/R ? print ("Episode = "+str(x)) observation = env.reset()
import numpy as np import matplotlib.pyplot as plt import math import environment import qlearn import pickle PI = math.pi if __name__ == '__main__': # Establish Communication last_time_steps = np.ndarray(0) environment = environment.Environment() qlearn = qlearn.QLearn(actions=range(len(environment.action_space)), alpha=0.2, gamma=0.8, epsilon=0.9) initial_epsilon = qlearn.epsilon epsilon_discount = 0.9986 start_time = time.time() total_episodes = 10000 highest_reward = 0 f = open('q_table.txt', 'a') f2 = open('q_table_list.pickle', 'wb') for x in range(total_episodes): done = False