class Simulator(object): def __init__( self, percentObsDensity=20, endTime=40, nonRandomWorld=False, circleRadius=0.7, worldScale=1.0, autoInitialize=True, verbose=True, ): self.verbose = verbose self.startSimTime = time.time() self.collisionThreshold = 0.4 self.randomSeed = 5 self.Sensor_rayLength = 8 self.percentObsDensity = percentObsDensity self.defaultControllerTime = 1000 self.nonRandomWorld = nonRandomWorld self.circleRadius = circleRadius self.worldScale = worldScale # create the visualizer object self.app = ConsoleApp() self.view = self.app.createView(useGrid=False) self.initializeOptions() self.initializeColorMap() if autoInitialize: self.initialize() self.XVelocity_drawing = 0.0 self.YVelocity_drawing = 0.0 def initializeOptions(self): self.options = dict() self.options["World"] = dict() self.options["World"]["obstaclesInnerFraction"] = 0.98 self.options["World"]["randomSeed"] = 40 self.options["World"]["percentObsDensity"] = 0.0 self.options["World"]["nonRandomWorld"] = True self.options["World"]["circleRadius"] = 1.0 self.options["World"]["scale"] = 1 self.options["Sensor"] = dict() self.options["Sensor"]["rayLength"] = 10 self.options["Sensor"]["numRays"] = 50 self.options["Car"] = dict() self.options["Car"]["velocity"] = 4.0 self.options["dt"] = 0.05 self.options["runTime"] = dict() self.options["runTime"]["defaultControllerTime"] = 100 def setDefaultOptions(self): defaultOptions = dict() defaultOptions["World"] = dict() defaultOptions["World"]["obstaclesInnerFraction"] = 0.98 defaultOptions["World"]["randomSeed"] = 40 defaultOptions["World"]["percentObsDensity"] = 30 defaultOptions["World"]["nonRandomWorld"] = True defaultOptions["World"]["circleRadius"] = 1.75 defaultOptions["World"]["scale"] = 2.5 defaultOptions["Sensor"] = dict() defaultOptions["Sensor"]["rayLength"] = 10 defaultOptions["Sensor"]["numRays"] = 51 defaultOptions["Car"] = dict() defaultOptions["Car"]["velocity"] = 20 defaultOptions["dt"] = 0.05 defaultOptions["runTime"] = dict() defaultOptions["runTime"]["defaultControllerTime"] = 100 for k in defaultOptions: self.options.setdefault(k, defaultOptions[k]) for k in defaultOptions: if not isinstance(defaultOptions[k], dict): continue for j in defaultOptions[k]: self.options[k].setdefault(j, defaultOptions[k][j]) def initializeColorMap(self): self.colorMap = dict() self.colorMap["default"] = [0, 1, 0] def initialize(self): self.dt = self.options["dt"] self.controllerTypeOrder = ["default"] self.setDefaultOptions() self.Sensor = SensorObj( rayLength=self.options["Sensor"]["rayLength"], numRays=self.options["Sensor"]["numRays"] ) self.SensorApproximator = SensorApproximatorObj( numRays=self.options["Sensor"]["numRays"], circleRadius=self.options["World"]["circleRadius"] ) self.Controller = ControllerObj(self.Sensor, self.SensorApproximator) self.Car = CarPlant(controller=self.Controller, velocity=self.options["Car"]["velocity"]) self.Controller.initializeVelocity(self.Car.v) self.ActionSet = ActionSetObj() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName("world")) self.world = World.buildLineSegmentTestWorld( percentObsDensity=self.options["World"]["percentObsDensity"], circleRadius=self.options["World"]["circleRadius"], nonRandom=self.options["World"]["nonRandomWorld"], scale=self.options["World"]["scale"], randomSeed=self.options["World"]["randomSeed"], obstaclesInnerFraction=self.options["World"]["obstaclesInnerFraction"], ) om.removeFromObjectModel(om.findObjectByName("robot")) self.robot, self.frame = World.buildRobot() self.frame = self.robot.getChildFrame() self.frame.setProperty("Scale", 3) # self.frame.setProperty('Visible', False) # self.frame.setProperty('Edit', True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.defaultControllerTime = self.options["runTime"]["defaultControllerTime"] self.Car.setFrame(self.frame) print "Finished initialization" def runSingleSimulation(self, controllerType="default", simulationCutoff=None): # self.setRandomCollisionFreeInitialState() self.setInitialStateAtZero() currentCarState = np.copy(self.Car.state) nextCarState = np.copy(self.Car.state) self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2]) firstRaycast = self.Sensor.raycastAll(self.frame) firstRaycastLocations = self.Sensor.raycastAllLocations(self.frame) # self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations) # self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData) # self.Sensor.setLocator(self.LineSegmentLocator) nextRaycast = np.zeros(self.Sensor.numRays) # record the reward data runData = dict() startIdx = self.counter thisRunIndex = 0 NMaxSteps = 100 while self.counter < self.numTimesteps - 1: thisRunIndex += 1 idx = self.counter currentTime = self.t[idx] self.stateOverTime[idx, :] = currentCarState x = self.stateOverTime[idx, 0] y = self.stateOverTime[idx, 1] self.setRobotFrameState(x, y, 0.0) # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) self.raycastData[idx, :] = currentRaycast S_current = (currentCarState, currentRaycast) if controllerType not in self.colorMap.keys(): print raise ValueError("controller of type " + controllerType + " not supported") if controllerType in ["default", "defaultRandom"]: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False ) self.controlInputData[idx] = controlInput nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt) print "NEXTCARSTATE is ", nextCarState x = nextCarState[0] y = nextCarState[1] self.setRobotFrameState(x, y, 0.0) nextRaycast = self.Sensor.raycastAll(self.frame) # Compute the next control input S_next = (nextCarState, nextRaycast) if controllerType in ["default", "defaultRandom"]: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=firstRaycast, randomize=False ) # bookkeeping currentCarState = nextCarState currentRaycast = nextRaycast self.counter += 1 # break if we are in collision if self.checkInCollision(nextRaycast): if self.verbose: print "Had a collision, terminating simulation" break if thisRunIndex > NMaxSteps: print "was safe during N steps" break if self.counter >= simulationCutoff: break # fill in the last state by hand self.stateOverTime[self.counter, :] = currentCarState self.raycastData[self.counter, :] = currentRaycast # this just makes sure we don't get stuck in an infinite loop. if startIdx == self.counter: self.counter += 1 return runData def setNumpyRandomSeed(self, seed=1): np.random.seed(seed) def runBatchSimulation(self, endTime=None, dt=0.05): self.controllerTypeOrder = ["default"] self.counter = 0 self.simulationData = [] self.initializeStatusBar() self.idxDict = dict() numRunsCounter = 0 self.idxDict["default"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.defaultControllerTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.defaultControllerTime / dt) and self.counter < self.numTimesteps - 1: self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(controllerType="default", simulationCutoff=simCutoff) runData["startIdx"] = startIdx runData["controllerType"] = "default" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) # BOOKKEEPING # truncate stateOverTime, raycastData, controlInputs to be the correct size self.numTimesteps = self.counter + 1 self.stateOverTime = self.stateOverTime[0 : self.counter + 1, :] self.raycastData = self.raycastData[0 : self.counter + 1, :] self.controlInputData = self.controlInputData[0 : self.counter + 1] self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime def initializeStatusBar(self): self.numTicks = 10 self.nextTickComplete = 1.0 / float(self.numTicks) self.nextTickIdx = 1 print "Simulation percentage complete: (", self.numTicks, " # is complete)" def printStatusBar(self): fractionDone = float(self.counter) / float(self.numTimesteps) if fractionDone > self.nextTickComplete: self.nextTickIdx += 1 self.nextTickComplete += 1.0 / self.numTicks timeSoFar = time.time() - self.startSimTime estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0 print "#" * self.nextTickIdx, "-" * ( self.numTicks - self.nextTickIdx ), "estimated", estimatedTimeLeft_minutes, "minutes left" def setCollisionFreeInitialState(self): tol = 5 while True: x = 0.0 y = -5.0 theta = 0 # + np.random.uniform(0,2*np.pi,1)[0] * 0.01 self.Car.setCarState(x, y, 0.0, 0.0) self.setRobotFrameState(x, y, theta) print "In loop" if not self.checkInCollision(): break return x, y, theta def setInitialStateAtZero(self): x = 0.0 y = 0.0 theta = 0.0 self.Car.setCarState(x, y, 0.0, 0.0) self.setRobotFrameState(x, y, theta) return x, y, theta def setRandomCollisionFreeInitialState(self): tol = 5 while True: x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol, 1)[0] y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol, 1)[0] theta = np.random.uniform(0, 2 * np.pi, 1)[0] self.Car.setCarState(x, y, theta) self.setRobotFrameState(x, y, theta) if not self.checkInCollision(): break return x, y, theta def setupPlayback(self): self.timer = TimerCallback(targetFps=30) self.timer.callback = self.tick playButtonFps = 1.0 / self.dt print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback self.sliderMovedByPlayTimer = False panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) self.max_velocity = 20.0 sliderXVelocity = QtGui.QSlider(QtCore.Qt.Horizontal) sliderXVelocity.connect("valueChanged(int)", self.onXVelocityChanged) sliderXVelocity.setMaximum(self.max_velocity) sliderXVelocity.setMinimum(-self.max_velocity) l.addWidget(sliderXVelocity) sliderYVelocity = QtGui.QSlider(QtCore.Qt.Horizontal) sliderYVelocity.connect("valueChanged(int)", self.onYVelocityChanged) sliderYVelocity.setMaximum(self.max_velocity) sliderYVelocity.setMinimum(-self.max_velocity) l.addWidget(sliderYVelocity) firstRaycast = np.ones((self.Sensor.numRays, 1)) * 10.0 + np.random.randn(self.Sensor.numRays, 1) * 1.0 self.drawFirstIntersections(self.frame, firstRaycast) randomGlobalGoalButton = QtGui.QPushButton("Initialize Random Global Goal") randomGlobalGoalButton.connect("clicked()", self.onRandomGlobalGoalButton) l.addWidget(randomGlobalGoalButton) randomObstaclesButton = QtGui.QPushButton("Initialize Random Obstacles") randomObstaclesButton.connect("clicked()", self.onRandomObstaclesButton) l.addWidget(randomObstaclesButton) buildWorldFromRandomObstaclesButton = QtGui.QPushButton("Generate Polygon World") buildWorldFromRandomObstaclesButton.connect("clicked()", self.onBuildWorldFromRandomObstacles) l.addWidget(buildWorldFromRandomObstaclesButton) findLocalGoalButton = QtGui.QPushButton("Find Local Goal (Heuristic)") findLocalGoalButton.connect("clicked()", self.onFindLocalGoalButton) l.addWidget(findLocalGoalButton) drawActionSetButton = QtGui.QPushButton("Draw Action Set") drawActionSetButton.connect("clicked()", self.onDrawActionSetButton) l.addWidget(drawActionSetButton) runSimButton = QtGui.QPushButton("Simulate") runSimButton.connect("clicked()", self.onRunSimButton) l.addWidget(runSimButton) playButton = QtGui.QPushButton("Play/Pause") playButton.connect("clicked()", self.onPlayButton) slider = QtGui.QSlider(QtCore.Qt.Horizontal) slider.connect("valueChanged(int)", self.onSliderChanged) self.sliderMax = self.numTimesteps slider.setMaximum(self.sliderMax) self.slider = slider # slider4 = QtGui.QSlider(QtCore.Qt.Horizontal) # slider4.setMaximum(self.sliderMax) # l.addWidget(slider4) # slider5 = QtGui.QSlider(QtCore.Qt.Horizontal) # slider5.setMaximum(self.sliderMax) # l.addWidget(slider5) # slider5 = QtGui.QSlider(QtCore.Qt.Horizontal) # slider5.setMaximum(self.sliderMax) # l.addWidget(slider5) # slider6 = QtGui.QSlider(QtCore.Qt.Horizontal) # slider6.setMaximum(self.sliderMax) # l.addWidget(slider6) # slider7 = QtGui.QSlider(QtCore.Qt.Horizontal) # slider7.setMaximum(self.sliderMax) # l.addWidget(slider7) l.addWidget(playButton) l.addWidget(slider) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) self.view.orientationMarkerWidget().Off() l.addWidget(panel) w.showMaximized() self.frame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.frame) applogic.resetCamera(viewDirection=[0.2, 0, -1]) self.view.showMaximized() self.view.raise_() panel = screengrabberpanel.ScreenGrabberPanel(self.view) panel.widget.show() cameracontrolpanel.CameraControlPanel(self.view).widget.show() elapsed = time.time() - self.startSimTime simRate = self.counter / elapsed print "Total run time", elapsed print "Ticks (Hz)", simRate print "Number of steps taken", self.counter self.onRandomObstaclesButton() self.app.start() def drawFirstIntersections(self, frame, firstRaycast): origin = np.array(frame.transform.GetPosition()) d = DebugData() firstRaycastLocations = self.Sensor.invertRaycastsToLocations(self.frame, firstRaycast) for i in xrange(self.Sensor.numRays): endpoint = firstRaycastLocations[i, :] if firstRaycast[i] >= self.Sensor.rayLength - 0.1: d.addLine(origin, endpoint, color=[0, 1, 0]) else: d.addLine(origin, endpoint, color=[1, 0, 0]) vis.updatePolyData(d.getPolyData(), "rays", colorByName="RGB255") self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations) self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData) self.locator = self.LineSegmentLocator self.Sensor.setLocator(self.LineSegmentLocator) def updateDrawIntersection(self, frame, locator="None"): if locator == "None": locator = self.locator origin = np.array(frame.transform.GetPosition()) # print "origin is now at", origin d = DebugData() sliderIdx = self.slider.value controllerType = self.getControllerTypeFromCounter(sliderIdx) colorMaxRange = self.colorMap[controllerType] for i in xrange(self.Sensor.numRays): ray = self.Sensor.rays[:, i] rayTransformed = np.array(frame.transform.TransformNormal(ray)) # print "rayTransformed is", rayTransformed intersection = self.Sensor.raycast(locator, origin, origin + rayTransformed * self.Sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1, 0, 0]) else: d.addLine(origin, origin + rayTransformed * self.Sensor.rayLength, color=colorMaxRange) vis.updatePolyData(d.getPolyData(), "rays", colorByName="RGB255") # camera = self.view.camera() # camera.SetFocalPoint(frame.transform.GetPosition()) # camera.SetPosition(frame.transform.TransformPoint((-30,0,10))) def getControllerTypeFromCounter(self, counter): name = self.controllerTypeOrder[0] for controllerType in self.controllerTypeOrder[1:]: if counter >= self.idxDict[controllerType]: name = controllerType return name def setRobotFrameState(self, x, y, theta): t = vtk.vtkTransform() t.Translate(x, y, 0.0) t.RotateZ(np.degrees(theta)) self.robot.getChildFrame().copyFrame(t) # returns true if we are in collision def checkInCollision(self, raycastDistance=None): if raycastDistance is None: self.setRobotFrameState(self.Car.state[0], self.Car.state[1], self.Car.state[2]) raycastDistance = self.Sensor.raycastAll(self.frame) if np.min(raycastDistance) < self.collisionThreshold: return True else: return False # if raycastDistance[(len(raycastDistance)+1)/2] < self.collisionThreshold: # return True # else: # return False def tick(self): # print timer.elapsed # simulate(t.elapsed) x = np.sin(time.time()) y = np.cos(time.time()) self.setRobotFrameState(x, y, 0.0) if (time.time() - self.playTime) > self.endTime: self.playTimer.stop() def tick2(self): newtime = time.time() - self.playTime print time.time() - self.playTime x = np.sin(newtime) y = np.cos(newtime) self.setRobotFrameState(x, y, 0.0) # just increment the slider, stop the timer if we get to the end def playTimerCallback(self): self.sliderMovedByPlayTimer = True currentIdx = self.slider.value nextIdx = currentIdx + 1 self.slider.setSliderPosition(nextIdx) if currentIdx >= self.sliderMax: print "reached end of tape, stopping playTime" self.playTimer.stop() def onSliderChanged(self, value): if not self.sliderMovedByPlayTimer: self.playTimer.stop() numSteps = len(self.stateOverTime) idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax))) idx = min(idx, numSteps - 1) x, y, xdot, ydot = self.stateOverTime[idx] self.setRobotFrameState(x, y, 0) self.sliderMovedByPlayTimer = False def onXVelocityChanged(self, value): self.XVelocity_drawing = value self.onDrawActionSetButton() print "x velocity changed to ", value def onYVelocityChanged(self, value): print value self.YVelocity_drawing = -value self.onDrawActionSetButton() print "y velocity changed to ", -value def onShowSensorsButton(self): print "I pressed the show sensors button" self.setInitialStateAtZero() firstRaycast = np.ones((self.Sensor.numRays, 1)) * 10.0 + np.random.randn(self.Sensor.numRays, 1) * 1.0 self.drawFirstIntersections(self.frame, firstRaycast) def onRandomObstaclesButton(self): print "random obstacles button pressed" self.setInitialStateAtZero() self.world = World.buildLineSegmentTestWorld( percentObsDensity=8.0, circleRadius=self.options["World"]["circleRadius"], nonRandom=False, scale=self.options["World"]["scale"], randomSeed=self.options["World"]["randomSeed"], obstaclesInnerFraction=self.options["World"]["obstaclesInnerFraction"], ) self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.updateDrawIntersection(self.frame, locator=self.locator) def onRandomGlobalGoalButton(self): print "random global goal button pressed" self.globalGoal = World.buildGlobalGoal() def onBuildWorldFromRandomObstacles(self): distances = self.Sensor.raycastAll(self.frame) firstRaycastLocations = self.Sensor.invertRaycastsToLocations(self.frame, distances) self.polygon_initial_distances = distances self.polygon_initial_raycastLocations = firstRaycastLocations self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations) self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData) self.locator = self.LineSegmentLocator self.Sensor.setLocator(self.LineSegmentLocator) self.updateDrawIntersection(self.frame) def onFindLocalGoalButton(self): print "find local goal button pressed" local_goal = self.findLocalGoal() print local_goal, "is my local goal" self.localGoal = World.placeLocalGoal(local_goal) def findLocalGoal(self): biggest_gap_width = 0 biggest_gap_start_index = 0 current_gap_width = 0 current_gap_start_index = 0 for index, value in enumerate(self.polygon_initial_distances): # if still in a gap if value == self.Sensor.rayLength: current_gap_width += 1 # else terminate counting for this gap else: if current_gap_width > biggest_gap_width: biggest_gap_width = current_gap_width biggest_gap_start_index = current_gap_start_index current_gap_width = 0 current_gap_start_index = index + 1 if current_gap_width > biggest_gap_width: biggest_gap_width = current_gap_width biggest_gap_start_index = current_gap_start_index middle_index_of_gap = biggest_gap_start_index + biggest_gap_width / 2 return self.polygon_initial_raycastLocations[middle_index_of_gap, :] def onDrawActionSetButton(self): print "drawing action set" # self.ActionSet.computeFinalPositions(self.XVelocity_drawing,self.YVelocity_drawing) self.ActionSet.computeAllPositions(self.XVelocity_drawing, self.YVelocity_drawing) # self.ActionSet.drawActionSetFinal() self.ActionSet.drawActionSetFull() def onRunSimButton(self): self.runBatchSimulation() self.saveToFile("latest") def onPlayButton(self): if self.playTimer.isActive(): self.onPauseButton() return print "play" self.playTimer.start() self.playTime = time.time() def onPauseButton(self): print "pause" self.playTimer.stop() def saveToFile(self, filename): # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime filename = "data/" + filename + ".out" my_shelf = shelve.open(filename, "n") my_shelf["options"] = self.options my_shelf["simulationData"] = self.simulationData my_shelf["stateOverTime"] = self.stateOverTime my_shelf["raycastData"] = self.raycastData my_shelf["controlInputData"] = self.controlInputData my_shelf["numTimesteps"] = self.numTimesteps my_shelf["idxDict"] = self.idxDict my_shelf["counter"] = self.counter my_shelf.close() def run(self, launchApp=True): self.counter = 1 # for use in playback self.dt = self.options["dt"] self.endTime = self.defaultControllerTime # used to be the sum of the other times as well self.t = np.arange(0.0, self.endTime, self.dt) maxNumTimesteps = np.size(self.t) self.stateOverTime = np.zeros((maxNumTimesteps + 1, 4)) self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays)) self.controlInputData = np.zeros((maxNumTimesteps + 1, 2)) self.numTimesteps = maxNumTimesteps # self.runBatchSimulation() if launchApp: self.setupPlayback() @staticmethod def loadFromFile(filename): filename = "data/" + filename + ".out" sim = Simulator(autoInitialize=False, verbose=False) my_shelf = shelve.open(filename) sim.options = my_shelf["options"] sim.initialize() sim.simulationData = my_shelf["simulationData"] sim.stateOverTime = np.array(my_shelf["stateOverTime"]) sim.raycastData = np.array(my_shelf["raycastData"]) sim.controlInputData = np.array(my_shelf["controlInputData"]) sim.numTimesteps = my_shelf["numTimesteps"] sim.idxDict = my_shelf["idxDict"] sim.counter = my_shelf["counter"] my_shelf.close() return sim
class Simulator(object): def __init__(self, percentObsDensity=20, endTime=40, randomizeControl=False, nonRandomWorld=False, circleRadius=0.7, worldScale=1.0, supervisedTrainingTime=500, autoInitialize=True, verbose=True, sarsaType="discrete"): self.verbose = verbose self.randomizeControl = randomizeControl self.startSimTime = time.time() self.collisionThreshold = 1.3 self.randomSeed = 5 self.Sarsa_numInnerBins = 4 self.Sarsa_numOuterBins = 4 self.Sensor_rayLength = 8 self.sarsaType = sarsaType self.percentObsDensity = percentObsDensity self.supervisedTrainingTime = 10 self.learningRandomTime = 10 self.learningEvalTime = 10 self.defaultControllerTime = 10 self.nonRandomWorld = nonRandomWorld self.circleRadius = circleRadius self.worldScale = worldScale # create the visualizer object self.app = ConsoleApp() # view = app.createView(useGrid=False) self.view = self.app.createView(useGrid=False) self.initializeOptions() self.initializeColorMap() if autoInitialize: self.initialize() def initializeOptions(self): self.options = dict() self.options['Reward'] = dict() self.options['Reward']['actionCost'] = 0.1 self.options['Reward']['collisionPenalty'] = 100.0 self.options['Reward']['raycastCost'] = 20.0 self.options['SARSA'] = dict() self.options['SARSA']['type'] = "discrete" self.options['SARSA']['lam'] = 0.7 self.options['SARSA']['alphaStepSize'] = 0.2 self.options['SARSA']['useQLearningUpdate'] = False self.options['SARSA']['epsilonGreedy'] = 0.2 self.options['SARSA']['burnInTime'] = 500 self.options['SARSA']['epsilonGreedyExponent'] = 0.3 self.options['SARSA'][ 'exponentialDiscountFactor'] = 0.05 #so gamma = e^(-rho*dt) self.options['SARSA']['numInnerBins'] = 5 self.options['SARSA']['numOuterBins'] = 4 self.options['SARSA']['binCutoff'] = 0.5 self.options['World'] = dict() self.options['World']['obstaclesInnerFraction'] = 0.85 self.options['World']['randomSeed'] = 40 self.options['World']['percentObsDensity'] = 7.5 self.options['World']['nonRandomWorld'] = True self.options['World']['circleRadius'] = 1.75 self.options['World']['scale'] = 1.0 self.options['Sensor'] = dict() self.options['Sensor']['rayLength'] = 10 self.options['Sensor']['numRays'] = 20 self.options['Car'] = dict() self.options['Car']['velocity'] = 12 self.options['dt'] = 0.05 self.options['runTime'] = dict() self.options['runTime']['supervisedTrainingTime'] = 10 self.options['runTime']['learningRandomTime'] = 10 self.options['runTime']['learningEvalTime'] = 10 self.options['runTime']['defaultControllerTime'] = 10 def setDefaultOptions(self): defaultOptions = dict() defaultOptions['Reward'] = dict() defaultOptions['Reward']['actionCost'] = 0.1 defaultOptions['Reward']['collisionPenalty'] = 100.0 defaultOptions['Reward']['raycastCost'] = 20.0 defaultOptions['SARSA'] = dict() defaultOptions['SARSA']['type'] = "discrete" defaultOptions['SARSA']['lam'] = 0.7 defaultOptions['SARSA']['alphaStepSize'] = 0.2 defaultOptions['SARSA']['useQLearningUpdate'] = False defaultOptions['SARSA']['useSupervisedTraining'] = True defaultOptions['SARSA']['epsilonGreedy'] = 0.2 defaultOptions['SARSA']['burnInTime'] = 500 defaultOptions['SARSA']['epsilonGreedyExponent'] = 0.3 defaultOptions['SARSA'][ 'exponentialDiscountFactor'] = 0.05 #so gamma = e^(-rho*dt) defaultOptions['SARSA']['numInnerBins'] = 5 defaultOptions['SARSA']['numOuterBins'] = 4 defaultOptions['SARSA']['binCutoff'] = 0.5 defaultOptions['SARSA']['forceDriveStraight'] = True defaultOptions['World'] = dict() defaultOptions['World']['obstaclesInnerFraction'] = 0.85 defaultOptions['World']['randomSeed'] = 40 defaultOptions['World']['percentObsDensity'] = 7.5 defaultOptions['World']['nonRandomWorld'] = True defaultOptions['World']['circleRadius'] = 1.75 defaultOptions['World']['scale'] = 1.0 defaultOptions['Sensor'] = dict() defaultOptions['Sensor']['rayLength'] = 10 defaultOptions['Sensor']['numRays'] = 20 defaultOptions['Car'] = dict() defaultOptions['Car']['velocity'] = 12 defaultOptions['dt'] = 0.05 defaultOptions['runTime'] = dict() defaultOptions['runTime']['supervisedTrainingTime'] = 10 defaultOptions['runTime']['learningRandomTime'] = 10 defaultOptions['runTime']['learningEvalTime'] = 10 defaultOptions['runTime']['defaultControllerTime'] = 10 for k in defaultOptions: self.options.setdefault(k, defaultOptions[k]) for k in defaultOptions: if not isinstance(defaultOptions[k], dict): continue for j in defaultOptions[k]: self.options[k].setdefault(j, defaultOptions[k][j]) def initializeColorMap(self): self.colorMap = dict() self.colorMap['defaultRandom'] = [0, 0, 1] self.colorMap['learnedRandom'] = [1.0, 0.54901961, 0.] # this is orange self.colorMap['learned'] = [0.58039216, 0.0, 0.82745098] # this is yellow self.colorMap['default'] = [0, 1, 0] def initialize(self): self.dt = self.options['dt'] self.controllerTypeOrder = [ 'defaultRandom', 'learnedRandom', 'learned', 'default' ] self.setDefaultOptions() self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'], numRays=self.options['Sensor']['numRays']) self.Controller = ControllerObj(self.Sensor) self.Car = CarPlant(controller=self.Controller, velocity=self.options['Car']['velocity']) self.Reward = Reward( self.Sensor, collisionThreshold=self.collisionThreshold, actionCost=self.options['Reward']['actionCost'], collisionPenalty=self.options['Reward']['collisionPenalty'], raycastCost=self.options['Reward']['raycastCost']) self.setSARSA() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName('world')) self.world = World.buildCircleWorld( percentObsDensity=self.options['World']['percentObsDensity'], circleRadius=self.options['World']['circleRadius'], nonRandom=self.options['World']['nonRandomWorld'], scale=self.options['World']['scale'], randomSeed=self.options['World']['randomSeed'], obstaclesInnerFraction=self.options['World'] ['obstaclesInnerFraction']) om.removeFromObjectModel(om.findObjectByName('robot')) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty('Scale', 3) self.frame.setProperty('Edit', True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.supervisedTrainingTime = self.options['runTime'][ 'supervisedTrainingTime'] self.learningRandomTime = self.options['runTime']['learningRandomTime'] self.learningEvalTime = self.options['runTime']['learningEvalTime'] self.defaultControllerTime = self.options['runTime'][ 'defaultControllerTime'] self.Car.setFrame(self.frame) print "Finished initialization" def setSARSA(self, type=None): if type is None: type = self.options['SARSA']['type'] if type == "discrete": self.Sarsa = SARSADiscrete( sensorObj=self.Sensor, actionSet=self.Controller.actionSet, collisionThreshold=self.collisionThreshold, alphaStepSize=self.options['SARSA']['alphaStepSize'], useQLearningUpdate=self.options['SARSA']['useQLearningUpdate'], lam=self.options['SARSA']['lam'], numInnerBins=self.options['SARSA']['numInnerBins'], numOuterBins=self.options['SARSA']['numOuterBins'], binCutoff=self.options['SARSA']['binCutoff'], burnInTime=self.options['SARSA']['burnInTime'], epsilonGreedy=self.options['SARSA']['epsilonGreedy'], epsilonGreedyExponent=self.options['SARSA'] ['epsilonGreedyExponent'], forceDriveStraight=self.options['SARSA']['forceDriveStraight']) elif type == "continuous": self.Sarsa = SARSAContinuous( sensorObj=self.Sensor, actionSet=self.Controller.actionSet, lam=self.options['SARSA']['lam'], alphaStepSize=self.options['SARSA']['alphaStepSize'], collisionThreshold=self.collisionThreshold, burnInTime=self.options['SARSA']['burnInTime'], epsilonGreedy=self.options['SARSA']['epsilonGreedy'], epsilonGreedyExponent=self.options['SARSA'] ['epsilonGreedyExponent']) else: raise ValueError( "sarsa type must be either discrete or continuous") def runSingleSimulation(self, updateQValues=True, controllerType='default', simulationCutoff=None): if self.verbose: print "using QValue based controller = ", useQValueController self.setCollisionFreeInitialState() currentCarState = np.copy(self.Car.state) nextCarState = np.copy(self.Car.state) self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) nextRaycast = np.zeros(self.Sensor.numRays) self.Sarsa.resetElibilityTraces() # record the reward data runData = dict() reward = 0 discountedReward = 0 avgReward = 0 startIdx = self.counter while (self.counter < self.numTimesteps - 1): idx = self.counter currentTime = self.t[idx] self.stateOverTime[idx, :] = currentCarState x = self.stateOverTime[idx, 0] y = self.stateOverTime[idx, 1] theta = self.stateOverTime[idx, 2] self.setRobotFrameState(x, y, theta) # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) self.raycastData[idx, :] = currentRaycast S_current = (currentCarState, currentRaycast) if controllerType not in self.colorMap.keys(): print raise ValueError("controller of type " + controllerType + " not supported") if controllerType in ["default", "defaultRandom"]: if controllerType == "defaultRandom": controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=True) else: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False) if controllerType in ["learned", "learnedRandom"]: if controllerType == "learned": randomizeControl = False else: randomizeControl = True counterForGreedyDecay = self.counter - self.idxDict[ "learnedRandom"] controlInput, controlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy( S_current, counter=counterForGreedyDecay, randomize=randomizeControl) self.emptyQValue[idx] = emptyQValue if emptyQValue and self.options['SARSA'][ 'useSupervisedTraining']: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False) self.controlInputData[idx] = controlInput nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt) # want to compute nextRaycast so we can do the SARSA algorithm x = nextCarState[0] y = nextCarState[1] theta = nextCarState[2] self.setRobotFrameState(x, y, theta) nextRaycast = self.Sensor.raycastAll(self.frame) # Compute the next control input S_next = (nextCarState, nextRaycast) if controllerType in ["default", "defaultRandom"]: if controllerType == "defaultRandom": nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=True) else: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False) if controllerType in ["learned", "learnedRandom"]: if controllerType == "learned": randomizeControl = False else: randomizeControl = True counterForGreedyDecay = self.counter - self.idxDict[ "learnedRandom"] nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy( S_next, counter=counterForGreedyDecay, randomize=randomizeControl) if emptyQValue and self.options['SARSA'][ 'useSupervisedTraining']: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False) # if useQValueController: # nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(S_next) # # if not useQValueController or emptyQValue: # nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(nextCarState, currentTime, self.frame, # raycastDistance=nextRaycast) # compute the reward reward = self.Reward.computeReward(S_next, controlInput) self.rewardData[idx] = reward discountedReward += self.Sarsa.gamma**(self.counter - startIdx) * reward avgReward += reward ###### SARSA update if updateQValues: self.Sarsa.sarsaUpdate(S_current, controlInputIdx, reward, S_next, nextControlInputIdx) #bookkeeping currentCarState = nextCarState currentRaycast = nextRaycast self.counter += 1 # break if we are in collision if self.checkInCollision(nextRaycast): if self.verbose: print "Had a collision, terminating simulation" break if self.counter >= simulationCutoff: break # fill in the last state by hand self.stateOverTime[self.counter, :] = currentCarState self.raycastData[self.counter, :] = currentRaycast # return the total reward avgRewardNoCollisionPenalty = avgReward - reward avgReward = avgReward * 1.0 / max(1, self.counter - startIdx) avgRewardNoCollisionPenalty = avgRewardNoCollisionPenalty * 1.0 / max( 1, self.counter - startIdx) # this extra multiplication is so that it is in the same "units" as avgReward runData['discountedReward'] = discountedReward * (1 - self.Sarsa.gamma) runData['avgReward'] = avgReward runData['avgRewardNoCollisionPenalty'] = avgRewardNoCollisionPenalty # this just makes sure we don't get stuck in an infinite loop. if startIdx == self.counter: self.counter += 1 return runData def setNumpyRandomSeed(self, seed=1): np.random.seed(seed) def runBatchSimulation(self, endTime=None, dt=0.05): # for use in playback self.dt = self.options['dt'] self.Sarsa.setDiscountFactor(dt) self.endTime = self.supervisedTrainingTime + self.learningRandomTime + self.learningEvalTime + self.defaultControllerTime self.t = np.arange(0.0, self.endTime, dt) maxNumTimesteps = np.size(self.t) self.stateOverTime = np.zeros((maxNumTimesteps + 1, 3)) self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays)) self.controlInputData = np.zeros(maxNumTimesteps + 1) self.rewardData = np.zeros(maxNumTimesteps + 1) self.emptyQValue = np.zeros(maxNumTimesteps + 1, dtype='bool') self.numTimesteps = maxNumTimesteps self.controllerTypeOrder = [ 'defaultRandom', 'learnedRandom', 'learned', 'default' ] self.counter = 0 self.simulationData = [] self.initializeStatusBar() self.idxDict = dict() numRunsCounter = 0 # three while loops for different phases of simulation, supervisedTraining, learning, default self.idxDict['defaultRandom'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.supervisedTrainingTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.supervisedTrainingTime / dt) and self.counter < self.numTimesteps): self.printStatusBar() useQValueController = False startIdx = self.counter runData = self.runSingleSimulation(updateQValues=True, controllerType='defaultRandom', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "defaultRandom" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict['learnedRandom'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.learningRandomTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.learningRandomTime / dt) and self.counter < self.numTimesteps): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(updateQValues=True, controllerType='learnedRandom', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "learnedRandom" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict['learned'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.learningEvalTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.learningEvalTime / dt) and self.counter < self.numTimesteps): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(updateQValues=False, controllerType='learned', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "learned" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict['default'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.defaultControllerTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.defaultControllerTime / dt) and self.counter < self.numTimesteps - 1): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(updateQValues=False, controllerType='default', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "default" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) # BOOKKEEPING # truncate stateOverTime, raycastData, controlInputs to be the correct size self.numTimesteps = self.counter + 1 self.stateOverTime = self.stateOverTime[0:self.counter + 1, :] self.raycastData = self.raycastData[0:self.counter + 1, :] self.controlInputData = self.controlInputData[0:self.counter + 1] self.rewardData = self.rewardData[0:self.counter + 1] self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime def initializeStatusBar(self): self.numTicks = 10 self.nextTickComplete = 1.0 / float(self.numTicks) self.nextTickIdx = 1 print "Simulation percentage complete: (", self.numTicks, " # is complete)" def printStatusBar(self): fractionDone = float(self.counter) / float(self.numTimesteps) if fractionDone > self.nextTickComplete: self.nextTickIdx += 1 self.nextTickComplete += 1.0 / self.numTicks timeSoFar = time.time() - self.startSimTime estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0 print "#" * self.nextTickIdx, "-" * ( self.numTicks - self.nextTickIdx ), "estimated", estimatedTimeLeft_minutes, "minutes left" def setCollisionFreeInitialState(self): tol = 5 while True: x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol, 1)[0] y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol, 1)[0] theta = np.random.uniform(0, 2 * np.pi, 1)[0] self.Car.setCarState(x, y, theta) self.setRobotFrameState(x, y, theta) if not self.checkInCollision(): break # if self.checkInCollision(): # print "IN COLLISION" # else: # print "COLLISION FREE" return x, y, theta def setupPlayback(self): self.timer = TimerCallback(targetFps=30) self.timer.callback = self.tick playButtonFps = 1.0 / self.dt print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback self.sliderMovedByPlayTimer = False panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) playButton = QtGui.QPushButton('Play/Pause') playButton.connect('clicked()', self.onPlayButton) slider = QtGui.QSlider(QtCore.Qt.Horizontal) slider.connect('valueChanged(int)', self.onSliderChanged) self.sliderMax = self.numTimesteps slider.setMaximum(self.sliderMax) self.slider = slider l.addWidget(playButton) l.addWidget(slider) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) l.addWidget(panel) w.showMaximized() self.frame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.frame) applogic.resetCamera(viewDirection=[0.2, 0, -1]) self.view.showMaximized() self.view.raise_() panel = screengrabberpanel.ScreenGrabberPanel(self.view) panel.widget.show() elapsed = time.time() - self.startSimTime simRate = self.counter / elapsed print "Total run time", elapsed print "Ticks (Hz)", simRate print "Number of steps taken", self.counter self.app.start() def run(self, launchApp=True): self.counter = 1 self.runBatchSimulation() # self.Sarsa.plotWeights() if launchApp: self.setupPlayback() def updateDrawIntersection(self, frame): origin = np.array(frame.transform.GetPosition()) #print "origin is now at", origin d = DebugData() sliderIdx = self.slider.value controllerType = self.getControllerTypeFromCounter(sliderIdx) colorMaxRange = self.colorMap[controllerType] # if the QValue was empty then color it green if self.emptyQValue[sliderIdx]: colorMaxRange = [1, 1, 0] # this is yellow for i in xrange(self.Sensor.numRays): ray = self.Sensor.rays[:, i] rayTransformed = np.array(frame.transform.TransformNormal(ray)) #print "rayTransformed is", rayTransformed intersection = self.Sensor.raycast( self.locator, origin, origin + rayTransformed * self.Sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1, 0, 0]) else: d.addLine(origin, origin + rayTransformed * self.Sensor.rayLength, color=colorMaxRange) vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255') #camera = self.view.camera() #camera.SetFocalPoint(frame.transform.GetPosition()) #camera.SetPosition(frame.transform.TransformPoint((-30,0,10))) def getControllerTypeFromCounter(self, counter): name = self.controllerTypeOrder[0] for controllerType in self.controllerTypeOrder[1:]: if counter >= self.idxDict[controllerType]: name = controllerType return name def setRobotFrameState(self, x, y, theta): t = vtk.vtkTransform() t.Translate(x, y, 0.0) t.RotateZ(np.degrees(theta)) self.robot.getChildFrame().copyFrame(t) # returns true if we are in collision def checkInCollision(self, raycastDistance=None): if raycastDistance is None: self.setRobotFrameState(self.Car.state[0], self.Car.state[1], self.Car.state[2]) raycastDistance = self.Sensor.raycastAll(self.frame) if np.min(raycastDistance) < self.collisionThreshold: return True else: return False def tick(self): #print timer.elapsed #simulate(t.elapsed) x = np.sin(time.time()) y = np.cos(time.time()) self.setRobotFrameState(x, y, 0.0) if (time.time() - self.playTime) > self.endTime: self.playTimer.stop() def tick2(self): newtime = time.time() - self.playTime print time.time() - self.playTime x = np.sin(newtime) y = np.cos(newtime) self.setRobotFrameState(x, y, 0.0) # just increment the slider, stop the timer if we get to the end def playTimerCallback(self): self.sliderMovedByPlayTimer = True currentIdx = self.slider.value nextIdx = currentIdx + 1 self.slider.setSliderPosition(nextIdx) if currentIdx >= self.sliderMax: print "reached end of tape, stopping playTime" self.playTimer.stop() def onSliderChanged(self, value): if not self.sliderMovedByPlayTimer: self.playTimer.stop() numSteps = len(self.stateOverTime) idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax))) idx = min(idx, numSteps - 1) x, y, theta = self.stateOverTime[idx] self.setRobotFrameState(x, y, theta) self.sliderMovedByPlayTimer = False def onPlayButton(self): if self.playTimer.isActive(): self.onPauseButton() return print 'play' self.playTimer.start() self.playTime = time.time() def onPauseButton(self): print 'pause' self.playTimer.stop() def computeRunStatistics(self): numRuns = len(self.simulationData) runStart = np.zeros(numRuns) runDuration = np.zeros(numRuns) grid = np.arange(1, numRuns + 1) discountedReward = np.zeros(numRuns) avgReward = np.zeros(numRuns) avgRewardNoCollisionPenalty = np.zeros(numRuns) idxMap = dict() for controllerType, color in self.colorMap.iteritems(): idxMap[controllerType] = np.zeros(numRuns, dtype=bool) for idx, val in enumerate(self.simulationData): runStart[idx] = val['startIdx'] runDuration[idx] = val['duration'] discountedReward[idx] = val['discountedReward'] avgReward[idx] = val['avgReward'] avgRewardNoCollisionPenalty[idx] = val[ 'avgRewardNoCollisionPenalty'] controllerType = val['controllerType'] idxMap[controllerType][idx] = True def plotRunData(self, controllerTypeToPlot=None, showPlot=True, onlyLearned=False): if controllerTypeToPlot == None: controllerTypeToPlot = self.colorMap.keys() if onlyLearned: controllerTypeToPlot = ['learnedRandom', 'learned'] numRuns = len(self.simulationData) runStart = np.zeros(numRuns) runDuration = np.zeros(numRuns) grid = np.arange(1, numRuns + 1) discountedReward = np.zeros(numRuns) avgReward = np.zeros(numRuns) avgRewardNoCollisionPenalty = np.zeros(numRuns) idxMap = dict() for controllerType, color in self.colorMap.iteritems(): idxMap[controllerType] = np.zeros(numRuns, dtype=bool) for idx, val in enumerate(self.simulationData): runStart[idx] = val['startIdx'] runDuration[idx] = val['duration'] discountedReward[idx] = val['discountedReward'] avgReward[idx] = val['avgReward'] avgRewardNoCollisionPenalty[idx] = val[ 'avgRewardNoCollisionPenalty'] controllerType = val['controllerType'] idxMap[controllerType][idx] = True # usedQValueController[idx] = (val['controllerType'] == "QValue") # usedDefaultController[idx] = (val['controllerType'] == "default") # usedDefaultRandomController[idx] = (val['controllerType'] == "defaultRandom") # usedQValueRandomController[idx] = (val['controllerType'] == "QValueRandom") self.runStatistics = dict() dataMap = { 'duration': runDuration, 'discountedReward': discountedReward, 'avgReward': avgReward, 'avgRewardNoCollisionPenalty': avgRewardNoCollisionPenalty } def computeRunStatistics(dataMap): for controllerType, idx in idxMap.iteritems(): d = dict() for dataName, dataSet in dataMap.iteritems(): # average the appropriate values in dataset d[dataName] = np.sum( dataSet[idx]) / (1.0 * np.size(dataSet[idx])) self.runStatistics[controllerType] = d computeRunStatistics(dataMap) if not showPlot: return plt.figure() # # idxDefaultRandom = np.where(usedDefaultRandomController==True)[0] # idxQValueController = np.where(usedQValueController==True)[0] # idxQValueControllerRandom = np.where(usedQValueControllerRandom==True)[0] # idxDefault = np.where(usedDefaultController==True)[0] # # plotData = dict() # plotData['defaultRandom'] = {'idx': idxDefaultRandom, 'color': 'b'} # plotData['QValue'] = {'idx': idxQValueController, 'color': 'y'} # plotData['default'] = {'idx': idxDefault, 'color': 'g'} def scatterPlot(dataToPlot): for controllerType in controllerTypeToPlot: idx = idxMap[controllerType] plt.scatter(grid[idx], dataToPlot[idx], color=self.colorMap[controllerType]) def barPlot(dataName): plt.title(dataName) barWidth = 0.5 barCounter = 0 index = np.arange(len(controllerTypeToPlot)) for controllerType in controllerTypeToPlot: val = self.runStatistics[controllerType] plt.bar(barCounter, val[dataName], barWidth, color=self.colorMap[controllerType], label=controllerType) barCounter += 1 plt.xticks(index + barWidth / 2.0, controllerTypeToPlot) plt.subplot(4, 1, 1) plt.title('run duration') scatterPlot(runDuration) # for controllerType, idx in idxMap.iteritems(): # plt.scatter(grid[idx], runDuration[idx], color=self.colorMap[controllerType]) # plt.scatter(runStart[idxDefaultRandom], runDuration[idxDefaultRandom], color='b') # plt.scatter(runStart[idxQValueController], runDuration[idxQValueController], color='y') # plt.scatter(runStart[idxDefault], runDuration[idxDefault], color='g') plt.xlabel('run #') plt.ylabel('episode duration') plt.subplot(2, 1, 2) plt.title('discounted reward') scatterPlot(discountedReward) # for key, val in plotData.iteritems(): # plt.scatter(grid[idx], discountedReward[idx], color=self.colorMap[controllerType]) # plt.subplot(3,1,3) # plt.title("average reward") # scatterPlot(avgReward) # for key, val in plotData.iteritems(): # plt.scatter(grid[val['idx']],avgReward[val['idx']], color=val['color']) # plt.subplot(4,1,4) # plt.title("average reward no collision penalty") # scatterPlot(avgRewardNoCollisionPenalty) # # for key, val in plotData.iteritems(): # # plt.scatter(grid[val['idx']],avgRewardNoCollisionPenalty[val['idx']], color=val['color']) ## plot summary statistics plt.figure() plt.subplot(2, 1, 1) barPlot("duration") plt.subplot(2, 1, 2) barPlot("discountedReward") # plt.subplot(3,1,3) # barPlot("avgReward") # # plt.subplot(4,1,4) # barPlot("avgRewardNoCollisionPenalty") plt.show() def plotMultipleRunData(self, simList, toPlot=['duration', 'discountedReward'], controllerType='learned'): plt.figure() numPlots = len(toPlot) grid = np.arange(len(simList)) def plot(fieldToPlot, plotNum): plt.subplot(numPlots, 1, plotNum) plt.title(fieldToPlot) val = 0 * grid barWidth = 0.5 barCounter = 0 for idx, sim in enumerate(simList): value = sim.runStatistics[controllerType][fieldToPlot] plt.bar(idx, value, barWidth) counter = 1 for fieldToPlot in toPlot: plot(fieldToPlot, counter) counter += 1 plt.show() def saveToFile(self, filename): # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime filename = 'data/' + filename + ".out" my_shelf = shelve.open(filename, 'n') my_shelf['options'] = self.options if self.options['SARSA']['type'] == "discrete": my_shelf['SARSA_QValues'] = self.Sarsa.QValues my_shelf['simulationData'] = self.simulationData my_shelf['stateOverTime'] = self.stateOverTime my_shelf['raycastData'] = self.raycastData my_shelf['controlInputData'] = self.controlInputData my_shelf['emptyQValue'] = self.emptyQValue my_shelf['numTimesteps'] = self.numTimesteps my_shelf['idxDict'] = self.idxDict my_shelf['counter'] = self.counter my_shelf.close() @staticmethod def loadFromFile(filename): filename = 'data/' + filename + ".out" sim = Simulator(autoInitialize=False, verbose=False) my_shelf = shelve.open(filename) sim.options = my_shelf['options'] sim.initialize() if sim.options['SARSA']['type'] == "discrete": sim.Sarsa.QValues = np.array(my_shelf['SARSA_QValues']) sim.simulationData = my_shelf['simulationData'] # sim.runStatistics = my_shelf['runStatistics'] sim.stateOverTime = np.array(my_shelf['stateOverTime']) sim.raycastData = np.array(my_shelf['raycastData']) sim.controlInputData = np.array(my_shelf['controlInputData']) sim.emptyQValue = np.array(my_shelf['emptyQValue']) sim.numTimesteps = my_shelf['numTimesteps'] sim.idxDict = my_shelf['idxDict'] sim.counter = my_shelf['counter'] my_shelf.close() return sim
class Simulator(object): def __init__(self, percentObsDensity=20, endTime=40, nonRandomWorld=False, circleRadius=0.7, worldScale=1.0, autoInitialize=True, verbose=True): self.verbose = verbose self.startSimTime = time.time() self.collisionThreshold = 1.0 self.randomSeed = 5 self.Sensor_rayLength = 8 self.percentObsDensity = percentObsDensity self.defaultControllerTime = 1000 self.nonRandomWorld = nonRandomWorld self.circleRadius = circleRadius self.worldScale = worldScale # create the visualizer object self.app = ConsoleApp() self.view = self.app.createView(useGrid=False) self.initializeOptions() self.initializeColorMap() self.initial_XVelocity = 0.0 self.initial_YVelocity = 0.0 self.running_sim = True self.currentIdx = 0; if autoInitialize: self.initialize() def initializeOptions(self): self.options = dict() self.options['World'] = dict() self.options['World']['obstaclesInnerFraction'] = 0.98 self.options['World']['randomSeed'] = 40 self.options['World']['percentObsDensity'] = 25.0 self.options['World']['nonRandomWorld'] = True self.options['World']['circleRadius'] = 0.6 self.circleRadius = self.options['World']['circleRadius'] self.options['World']['scale'] = 2.0 self.options['Sensor'] = dict() self.options['Sensor']['rayLength'] = 10 self.options['Sensor']['numRays'] = 40 self.options['Car'] = dict() self.options['Car']['velocity'] = 20 self.options['dt'] = 0.05 self.options['runTime'] = dict() self.options['runTime']['defaultControllerTime'] = 100 def initializeColorMap(self): self.colorMap = dict() self.colorMap['default'] = [0,1,0] def initialize(self): self.dt = self.options['dt'] self.controllerTypeOrder = ['default'] self.Controller = ControllerObj() self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'], numRays=self.options['Sensor']['numRays']) self.Car = CarPlant(controller=self.Controller, velocity=self.options['Car']['velocity']) self.Controller.initializeVelocity(self.Car.v) self.ActionSet = ActionSetObj() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName('world')) self.world = World.buildCircleWorld(percentObsDensity=self.options['World']['percentObsDensity'], circleRadius=self.options['World']['circleRadius'], nonRandom=self.options['World']['nonRandomWorld'], scale=self.options['World']['scale'], randomSeed=self.options['World']['randomSeed'], obstaclesInnerFraction=self.options['World']['obstaclesInnerFraction']) om.removeFromObjectModel(om.findObjectByName('robot')) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) #will need to set locator for purple trajectories #self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty('Scale', 3) #self.frame.setProperty('Visible', False) #self.frame.setProperty('Edit', True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.defaultControllerTime = self.options['runTime']['defaultControllerTime'] self.Car.setFrame(self.frame) print "Finished initialization" def terminalEuclideanCostForTrajectories(self): # access the trajectories x_traj = self.ActionSet.p_x_trajectories y_traj = self.ActionSet.p_y_trajectories final_x_positions = x_traj[:,-1] final_y_positions = y_traj[:,-1] #access the global goal global_goal_x = self.globalGoal.global_goal_x global_goal_y = self.globalGoal.global_goal_y euclideans_vector = [] # def getKey(item): # return item[1] for i in xrange(self.ActionSet.num_x_bins): for j in xrange(self.ActionSet.num_y_bins): distance = np.sqrt((final_x_positions[i] - global_goal_x)**2 + (final_y_positions[j] - global_goal_y)**2) euclideans_vector.append(distance) #sorted_ranks_with_indices = sorted(ranks_with_indices,key=getKey) return np.array(euclideans_vector) def CheckIfCollisionFreeTrajectoryIndex(self, traj_index): # accessing the right trajectory x_trajectory_to_check = self.ActionSet.p_x_trajectories[traj_index[0]] y_trajectory_to_check = self.ActionSet.p_y_trajectories[traj_index[1]] #check if anything is too close to the center of the circles #print self.world.list_of_circles for i in range(len(x_trajectory_to_check)): point = ( x_trajectory_to_check[i], y_trajectory_to_check[i] ) if not self.CheckIfCollisionFreePoint(point): #print "trajectory wasn't free" return False return True def CheckIfCollisionFreePoint(self, point): for circle_center in self.world.list_of_circles: #print "circle centers first" #print circle_center[0], circle_center[1] #print self.circleRadius #print point[0], point[1] #print (circle_center[0] - point[0])**2 + (circle_center[1]-point[1])**2 if ( (circle_center[0] - point[0])**2 + (circle_center[1]-point[1])**2 < self.circleRadius**2): #print "I found a point in a circle" return False if point[0] > self.world.Xmax: #print "I would have gone past top wall" return False if point[0] < self.world.Xmin: #print "I would have gone below bottom wall" return False if point[1] > self.world.Ymax: #print "I would have gone to the right of the side wall" return False if point[1] < self.world.Ymin: #print "I would have gone to the left of the side wall" return False return True def computeProbabilitiesOfCollisionAllTrajectories(self, currentRaycastIntersectionLocations, speed_allowed_matrix): probability_vector = [] indices_vector = [] for x_index in xrange(self.ActionSet.num_x_bins): for y_index in xrange(self.ActionSet.num_y_bins): if not speed_allowed_matrix[x_index, y_index]: probability_vector.append(1.0) else: probability_of_collision = self.computeProbabilityOfCollisionOneTrajectory(x_index, y_index, currentRaycastIntersectionLocations) probability_vector.append(probability_of_collision) indices_vector.append([x_index, y_index]) return np.array(probability_vector), indices_vector def computeProbabilityOfCollisionOneTrajectory(self, x_index, y_index, currentRaycastIntersectionLocations): probability_no_collision = 1 for time_step_index, time_step_value in enumerate(self.ActionSet.t_vector): for obstacle_center in currentRaycastIntersectionLocations: probability_of_collision_step_k_obstacle_j = self.computeProbabilityOfCollisionOneStepOneObstacle(x_index, y_index, time_step_index, time_step_value, obstacle_center) probability_no_collision_step_k_obstacle_j = 1 - probability_of_collision_step_k_obstacle_j probability_no_collision = probability_no_collision * probability_no_collision_step_k_obstacle_j return float(1.0 - probability_no_collision) def computeProbabilityOfCollisionOneStepOneObstacle(self, x_index, y_index, time_step_index, time_step_value, obstacle_center): volume = 4.18 Sigma_sensor = np.zeros((3,3)) np.fill_diagonal(Sigma_sensor, self.sigma_sensor) variance_x = (2.5 + abs(self.current_initial_velocity_x*0.1))*time_step_value+0.01 variance_y = (2.5 + abs(self.current_initial_velocity_y*0.1))*time_step_value+0.01 variance_z = (1.5)*time_step_value+0.01 Sigma_robot = np.zeros((3,3)) np.fill_diagonal(Sigma_sensor, [variance_x, variance_y, variance_z]) Sigma_C = Sigma_robot + Sigma_sensor denominator = np.sqrt(np.linalg.det(2*np.pi*Sigma_C)) x = self.ActionSet.p_x_trajectories[x_index, time_step_index] y = self.ActionSet.p_y_trajectories[y_index, time_step_index] z = 0 x_robot = np.array([x, y, z]) obstacle_center = np.array(obstacle_center) exponent = - 0.5 * np.matrix((x_robot - obstacle_center)) * np.matrix(np.linalg.inv(Sigma_C)) * np.matrix((x_robot - obstacle_center)).T return volume / denominator * np.exp(exponent) def computeJerkVector(self,controlInput): jerk_vector = [] for x_index in xrange(self.ActionSet.num_x_bins): for y_index in xrange(self.ActionSet.num_y_bins): last_x_accel = controlInput[0] last_y_accel = controlInput[1] this_x_accel = self.ActionSet.a_x[x_index] this_y_accel = self.ActionSet.a_y[y_index] jerk = np.sqrt( (last_x_accel - this_x_accel)**2 + (last_y_accel - this_y_accel)**2 ) jerk_vector.append(jerk) return np.array(jerk_vector) def identifySpeedAllowedTrajectories(self): speed_allowed_matrix = np.ones((self.ActionSet.num_x_bins, self.ActionSet.num_y_bins)) # for x_index in xrange(self.ActionSet.num_x_bins): # for y_index in xrange(self.ActionSet.num_y_bins): # this_x_accel = self.ActionSet.a_x[x_index] # this_y_accel = self.ActionSet.a_y[y_index] # if np.sqrt( (self.current_initial_velocity_x - this_x_accel)**2 + (self.current_initial_velocity_y - this_y_accel)**2) < self.speed_max: # speed_allowed_matrix[x_index,y_index] = 1 return speed_allowed_matrix def runSingleSimulation(self, controllerType='default', simulationCutoff=None): self.Car.setCarState(0.0,0.0,self.initial_XVelocity,self.initial_YVelocity) self.setRobotFrameState(0.0,0.0,0.0) currentCarState = np.copy(self.Car.state) nextCarState = np.copy(self.Car.state) self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2]) self.Sensor.setLocator(self.locator) firstRaycast = self.Sensor.raycastAll(self.frame) firstRaycastLocations = self.Sensor.raycastAllLocations(self.frame) nextRaycast = np.zeros(self.Sensor.numRays) # record the reward data runData = dict() startIdx = self.counter controlInput = [0,0] while (self.counter < self.numTimesteps - 1): idx = self.counter currentTime = self.t[idx] self.stateOverTime[idx,:] = currentCarState x = self.stateOverTime[idx,0] y = self.stateOverTime[idx,1] self.setRobotFrameState(x,y,0.0) # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycastIntersectionLocations = self.Sensor.raycastLocationsOnlyOfIntersections(self.frame) if self.checkInCollision(currentCarState[0], currentCarState[1], currentRaycastIntersectionLocations): break if controllerType not in self.colorMap.keys(): print raise ValueError("controller of type " + controllerType + " not supported") # compute all trajectories from action set self.ActionSet.computeAllPositions(currentCarState[0], currentCarState[1], currentCarState[2],currentCarState[3]) self.current_initial_velocity_x = currentCarState[2] self.current_initial_velocity_y = currentCarState[3] speed_allowed_matrix = self.identifySpeedAllowedTrajectories() probability_vector, indices_list = self.computeProbabilitiesOfCollisionAllTrajectories(currentRaycastIntersectionLocations, speed_allowed_matrix) euclideans_vector = self.terminalEuclideanCostForTrajectories() jerk_vector = self.computeJerkVector(controlInput) # k_collision_cost = 10 # k_terminal_cost = 1 # k_jerk = 0.1 # sum_vector = probability_vector*k_collision_cost + euclideans_vector/np.max(euclideans_vector)*k_terminal_cost + k_jerk*jerk_vector/np.max(jerk_vector) # min_action_index_in_vector = np.argmin(sum_vector) # x_index_to_use = indices_list[min_action_index_in_vector][0] # y_index_to_use = indices_list[min_action_index_in_vector][1] # convert to probability no collision probability_vector = np.ones(len(probability_vector)) - probability_vector # convert distances to amount of progress current_distance = np.sqrt((currentCarState[0] - self.globalGoal.global_goal_x)**2 + (currentCarState[1] - self.globalGoal.global_goal_y)**2) #print "euclideans vector", euclideans_vector euclidean_progress_vector = np.ones(len(euclideans_vector))*current_distance - euclideans_vector #print "euclidean progress vector", euclidean_progress_vector reward_vector = euclidean_progress_vector - 0.1*jerk_vector/np.max(jerk_vector) #print "reward_vector", reward_vector expected_reward = np.multiply(probability_vector, reward_vector) max_action_index_in_vector = np.argmax(expected_reward) x_index_to_use = indices_list[max_action_index_in_vector][0] y_index_to_use = indices_list[max_action_index_in_vector][1] self.actionIndicesOverTime[idx,:] = [x_index_to_use, y_index_to_use] x_accel = self.ActionSet.a_x[x_index_to_use] y_accel = self.ActionSet.a_y[y_index_to_use] controlInput = [x_accel, y_accel] self.controlInputData[idx] = controlInput nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt) x = nextCarState[0] y = nextCarState[1] theta = nextCarState[2] self.setRobotFrameState(x,y,theta) #bookkeeping currentCarState = nextCarState self.counter+=1 # break if we are in collision if self.counter >= simulationCutoff: break # fill in the last state by hand self.stateOverTime[self.counter,:] = currentCarState self.actionIndicesOverTime[self.counter,:] = [x_index_to_use, y_index_to_use] # this just makes sure we don't get stuck in an infinite loop. if startIdx == self.counter: self.counter += 1 return runData def setNumpyRandomSeed(self, seed=1): np.random.seed(seed) def runBatchSimulation(self, endTime=None, dt=0.05): # for use in playback self.dt = self.options['dt'] self.endTime = self.defaultControllerTime # used to be the sum of the other times as well self.t = np.arange(0.0, self.endTime, dt) maxNumTimesteps = np.size(self.t) self.stateOverTime = np.zeros((maxNumTimesteps+1, 4)) self.actionIndicesOverTime = np.zeros((maxNumTimesteps+1, 2)) self.controlInputData = np.zeros((maxNumTimesteps+1,2)) self.numTimesteps = maxNumTimesteps self.controllerTypeOrder = ['default'] self.counter = 0 self.simulationData = [] self.initializeStatusBar() self.idxDict = dict() numRunsCounter = 0 self.idxDict['default'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.defaultControllerTime/dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.defaultControllerTime/dt) and self.counter < self.numTimesteps-1): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(controllerType='default', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "default" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter+=1 self.simulationData.append(runData) # BOOKKEEPING # truncate stateOverTime, raycastData, controlInputs to be the correct size self.numTimesteps = self.counter + 1 self.stateOverTime = self.stateOverTime[0:self.counter+1, :] self.actionIndicesOverTime = self.actionIndicesOverTime[0:self.counter+1, :] self.controlInputData = self.controlInputData[0:self.counter+1] self.endTime = 1.0*self.counter/self.numTimesteps*self.endTime def initializeStatusBar(self): self.numTicks = 10 self.nextTickComplete = 1.0 / float(self.numTicks) self.nextTickIdx = 1 print "Simulation percentage complete: (", self.numTicks, " # is complete)" def printStatusBar(self): fractionDone = float(self.counter) / float(self.numTimesteps) if fractionDone > self.nextTickComplete: self.nextTickIdx += 1 self.nextTickComplete += 1.0 / self.numTicks timeSoFar = time.time() - self.startSimTime estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0 print "#" * self.nextTickIdx, "-" * (self.numTicks - self.nextTickIdx), "estimated", estimatedTimeLeft_minutes, "minutes left" def setCollisionFreeInitialState(self): tol = 5 while True: x = 0.0 y = -5.0 theta = 0 #+ np.random.uniform(0,2*np.pi,1)[0] * 0.01 self.Car.setCarState(x,y,theta) self.setRobotFrameState(x,y,theta) print "In loop" if not self.checkInCollision(): break return x,y,theta def setRandomCollisionFreeInitialState(self): tol = 5 while True: x = np.random.uniform(self.world.Xmin+tol, self.world.Xmax-tol, 1)[0] y = np.random.uniform(self.world.Ymin+tol, self.world.Ymax-tol, 1)[0] #theta = np.random.uniform(0,2*np.pi,1)[0] theta = 0 #always forward self.Car.setCarState(x,y,self.initial_XVelocity,self.initial_YVelocity) self.setRobotFrameState(x,y,theta) if not self.checkInCollision(): break self.firstRandomCollisionFreeInitialState_x = x self.firstRandomCollisionFreeInitialState_y = y return x,y,0,0 def setupPlayback(self): self.timer = TimerCallback(targetFps=30) self.timer.callback = self.tick playButtonFps = 1.0/self.dt print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback self.sliderMovedByPlayTimer = False panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) self.max_velocity = 20.0 sliderXVelocity = QtGui.QSlider(QtCore.Qt.Horizontal) sliderXVelocity.connect('valueChanged(int)', self.onXVelocityChanged) sliderXVelocity.setMaximum(self.max_velocity) sliderXVelocity.setMinimum(-self.max_velocity) l.addWidget(sliderXVelocity) sliderYVelocity = QtGui.QSlider(QtCore.Qt.Horizontal) sliderYVelocity.connect('valueChanged(int)', self.onYVelocityChanged) sliderYVelocity.setMaximum(self.max_velocity) sliderYVelocity.setMinimum(-self.max_velocity) l.addWidget(sliderYVelocity) randomGlobalGoalButton = QtGui.QPushButton('Initialize Random Global Goal') randomGlobalGoalButton.connect('clicked()', self.onRandomGlobalGoalButton) l.addWidget(randomGlobalGoalButton) drawActionSetButton = QtGui.QPushButton('Draw Action Set') drawActionSetButton.connect('clicked()', self.onDrawActionSetButton) l.addWidget(drawActionSetButton) runSimButton = QtGui.QPushButton('Simulate') runSimButton.connect('clicked()', self.onRunSimButton) l.addWidget(runSimButton) playButton = QtGui.QPushButton('Play/Pause') playButton.connect('clicked()', self.onPlayButton) slider = QtGui.QSlider(QtCore.Qt.Horizontal) slider.connect('valueChanged(int)', self.onSliderChanged) self.sliderMax = self.numTimesteps slider.setMaximum(self.sliderMax) self.slider = slider l.addWidget(playButton) l.addWidget(slider) toggleSensorNoiseButton = QtGui.QPushButton('Toggle Sensor Noise') toggleSensorNoiseButton.connect('clicked()', self.onToggleSensorNoise) l.addWidget(toggleSensorNoiseButton) showObstaclesButton = QtGui.QPushButton('Show Obsatacles') showObstaclesButton.connect('clicked()', self.onShowObstacles) l.addWidget(showObstaclesButton) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) self.view.orientationMarkerWidget().Off() l.addWidget(panel) w.showMaximized() # need to connect frames with DrawActionSet self.frame.connectFrameModified(self.updateDrawActionSet) self.updateDrawActionSet(self.frame) self.frame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.frame) applogic.resetCamera(viewDirection=[0.2,0,-1]) self.view.showMaximized() self.view.raise_() panel = screengrabberpanel.ScreenGrabberPanel(self.view) panel.widget.show() cameracontrolpanel.CameraControlPanel(self.view).widget.show() elapsed = time.time() - self.startSimTime simRate = self.counter/elapsed print "Total run time", elapsed print "Ticks (Hz)", simRate print "Number of steps taken", self.counter self.app.start() def run(self, launchApp=True): self.sphere_toggle = False self.sigma_sensor = 0.5 self.speed_max = 10.0 self.running_sim = True self.onRandomGlobalGoalButton() self.counter = 1 self.runBatchSimulation() self.running_sim = False if launchApp: self.setupPlayback() def onToggleSensorNoise(self): self.sphere_toggle = not self.sphere_toggle def onShowObstacles(self): print "time to show obstacles" A = World.buildCircleWorld(percentObsDensity=self.options['World']['percentObsDensity'], circleRadius=self.options['World']['circleRadius'], nonRandom=self.options['World']['nonRandomWorld'], scale=self.options['World']['scale'], randomSeed=self.options['World']['randomSeed'], obstaclesInnerFraction=self.options['World']['obstaclesInnerFraction'], alpha=1.0) def drawFirstIntersections(self, frame, firstRaycast): origin = np.array(frame.transform.GetPosition()) d = DebugData() firstRaycastLocations = self.Sensor.invertRaycastsToLocations(self.frame, firstRaycast) for i in xrange(self.Sensor.numRays): endpoint = firstRaycastLocations[i,:] if firstRaycast[i] >= self.Sensor.rayLength - 0.1: d.addLine(origin, endpoint, color=[0,1,0]) else: d.addLine(origin, endpoint, color=[1,0,0]) vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255') self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations) self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData) self.locator = self.LineSegmentLocator self.Sensor.setLocator(self.LineSegmentLocator) def updateDrawIntersection(self, frame, locator="None"): if locator=="None": locator = self.locator if self.sphere_toggle: sphere_radius=self.sigma_sensor else: sphere_radius=0.0 origin = np.array(frame.transform.GetPosition()) #print "origin is now at", origin d = DebugData() d_sphere=DebugData() sliderIdx = self.slider.value controllerType = self.getControllerTypeFromCounter(sliderIdx) colorMaxRange = self.colorMap[controllerType] for i in xrange(self.Sensor.numRays): ray = self.Sensor.rays[:,i] rayTransformed = np.array(frame.transform.TransformNormal(ray)) #print "rayTransformed is", rayTransformed intersection = self.Sensor.raycast(locator, origin, origin + rayTransformed*self.Sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1,0,0]) d_sphere.addSphere(intersection, radius=sphere_radius, color=[1,0,0]) else: d.addLine(origin, origin+rayTransformed*self.Sensor.rayLength, color=colorMaxRange) d_sphere.addSphere(origin, radius=0.0, color=[0,1,0]) vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255') vis.updatePolyData(d_sphere.getPolyData(), 'spheres', colorByName='RGB255', alpha=0.3) def getControllerTypeFromCounter(self, counter): name = self.controllerTypeOrder[0] for controllerType in self.controllerTypeOrder[1:]: if counter >= self.idxDict[controllerType]: name = controllerType return name def setRobotFrameState(self, x, y, theta): t = vtk.vtkTransform() t.Translate(x,y,0.0) t.RotateZ(np.degrees(theta)) self.robot.getChildFrame().copyFrame(t) # returns true if we are in collision def checkInCollision(self, x, y, raycastLocations): return False # carState = [x,y,0] # for raycastLocation in raycastLocations: # if np.linalg.norm(carState - raycastLocation) < 0.3: # return True # return False def updateDrawActionSet(self, frame): origin = np.array(frame.transform.GetPosition()) if not self.running_sim: self.ActionSet.computeAllPositions(self.stateOverTime[self.currentIdx,0], self.stateOverTime[self.currentIdx,1], self.stateOverTime[self.currentIdx,2],self.stateOverTime[self.currentIdx,3]) #self.ActionSet.drawActionSetFinal() self.ActionSet.drawActionSetFull() self.drawChosenFunnel() def drawChosenFunnel(self): velocity_initial_x = self.stateOverTime[self.currentIdx,2] velocity_initial_y = self.stateOverTime[self.currentIdx,3] variance_x = 2.5 + abs(velocity_initial_x*0.1) variance_y = 2.5 + abs(velocity_initial_y*0.1) variance_z = 1.5 x_index = self.actionIndicesOverTime[self.currentIdx,0] y_index = self.actionIndicesOverTime[self.currentIdx,1] for i in xrange(0,10): time = 0.5/10.0*i x_center = self.ActionSet.p_x_trajectories[x_index, i] y_center = self.ActionSet.p_y_trajectories[y_index, i] z_center = 0.0 World.buildEllipse(i, [x_center,y_center,z_center], variance_x*time, variance_y*time, variance_z*time, alpha=0.3) def tick(self): #print timer.elapsed #simulate(t.elapsed) x = np.sin(time.time()) y = np.cos(time.time()) self.setRobotFrameState(x,y,0.0) if (time.time() - self.playTime) > self.endTime: self.playTimer.stop() def tick2(self): newtime = time.time() - self.playTime print time.time() - self.playTime x = np.sin(newtime) y = np.cos(newtime) self.setRobotFrameState(x,y,0.0) # just increment the slider, stop the timer if we get to the end def playTimerCallback(self): self.sliderMovedByPlayTimer = True currentIdx = self.slider.value nextIdx = currentIdx + 1 self.slider.setSliderPosition(nextIdx) if currentIdx >= self.sliderMax: print "reached end of tape, stopping playTime" self.playTimer.stop() def onXVelocityChanged(self, value): self.initial_XVelocity = value print "initial x velocity changed to ", value def onYVelocityChanged(self, value): self.initial_YVelocity = -value print "initial y velocity changed to ", -value def onSliderChanged(self, value): if not self.sliderMovedByPlayTimer: self.playTimer.stop() numSteps = len(self.stateOverTime) idx = int(np.floor(numSteps*(1.0*value/self.sliderMax))) idx = min(idx, numSteps-1) self.currentIdx = idx x,y, xdot, ydot = self.stateOverTime[idx] self.setRobotFrameState(x,y,0) self.sliderMovedByPlayTimer = False def onRandomGlobalGoalButton(self): print "random global goal button pressed" self.globalGoal = World.buildGlobalGoal(scale=self.options['World']['scale']) def onDrawActionSetButton(self): print "drawing action set" #self.ActionSet.computeFinalPositions(self.XVelocity_drawing,self.YVelocity_drawing) self.ActionSet.computeAllPositions(self.Car.state[0], self.Car.state[1], self.initial_XVelocity, self.initial_YVelocity) #self.ActionSet.drawActionSetFinal() self.ActionSet.drawActionSetFull() def onRunSimButton(self): self.running_sim = True self.runBatchSimulation() self.running_sim = False #self.saveToFile("latest") def onPlayButton(self): if self.playTimer.isActive(): self.onPauseButton() return print 'play' self.playTimer.start() self.playTime = time.time() def onPauseButton(self): print 'pause' self.playTimer.stop() def saveToFile(self, filename): # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime filename = 'data/' + filename + ".out" my_shelf = shelve.open(filename,'n') my_shelf['options'] = self.options my_shelf['simulationData'] = self.simulationData my_shelf['stateOverTime'] = self.stateOverTime my_shelf['controlInputData'] = self.controlInputData my_shelf['numTimesteps'] = self.numTimesteps my_shelf['idxDict'] = self.idxDict my_shelf['counter'] = self.counter my_shelf.close() @staticmethod def loadFromFile(filename): filename = 'data/' + filename + ".out" sim = Simulator(autoInitialize=False, verbose=False) my_shelf = shelve.open(filename) sim.options = my_shelf['options'] sim.initialize() sim.simulationData = my_shelf['simulationData'] sim.stateOverTime = np.array(my_shelf['stateOverTime']) sim.controlInputData = np.array(my_shelf['controlInputData']) sim.numTimesteps = my_shelf['numTimesteps'] sim.idxDict = my_shelf['idxDict'] sim.counter = my_shelf['counter'] my_shelf.close() return sim
class Simulator(object): def __init__( self, percentObsDensity=20, endTime=40, randomizeControl=False, nonRandomWorld=False, circleRadius=0.7, worldScale=1.0, supervisedTrainingTime=500, autoInitialize=True, verbose=True, sarsaType="discrete", ): self.verbose = verbose self.randomizeControl = randomizeControl self.startSimTime = time.time() self.collisionThreshold = 1.3 self.randomSeed = 5 self.Sarsa_numInnerBins = 4 self.Sarsa_numOuterBins = 4 self.Sensor_rayLength = 8 self.sarsaType = sarsaType self.percentObsDensity = percentObsDensity self.supervisedTrainingTime = 10 self.learningRandomTime = 10 self.learningEvalTime = 10 self.defaultControllerTime = 10 self.nonRandomWorld = nonRandomWorld self.circleRadius = circleRadius self.worldScale = worldScale # create the visualizer object self.app = ConsoleApp() # view = app.createView(useGrid=False) self.view = self.app.createView(useGrid=False) self.initializeOptions() self.initializeColorMap() if autoInitialize: self.initialize() def initializeOptions(self): self.options = dict() self.options["Reward"] = dict() self.options["Reward"]["actionCost"] = 0.1 self.options["Reward"]["collisionPenalty"] = 100.0 self.options["Reward"]["raycastCost"] = 20.0 self.options["SARSA"] = dict() self.options["SARSA"]["type"] = "discrete" self.options["SARSA"]["lam"] = 0.7 self.options["SARSA"]["alphaStepSize"] = 0.2 self.options["SARSA"]["useQLearningUpdate"] = False self.options["SARSA"]["epsilonGreedy"] = 0.2 self.options["SARSA"]["burnInTime"] = 500 self.options["SARSA"]["epsilonGreedyExponent"] = 0.3 self.options["SARSA"]["exponentialDiscountFactor"] = 0.05 # so gamma = e^(-rho*dt) self.options["SARSA"]["numInnerBins"] = 5 self.options["SARSA"]["numOuterBins"] = 4 self.options["SARSA"]["binCutoff"] = 0.5 self.options["World"] = dict() self.options["World"]["obstaclesInnerFraction"] = 0.85 self.options["World"]["randomSeed"] = 40 self.options["World"]["percentObsDensity"] = 7.5 self.options["World"]["nonRandomWorld"] = True self.options["World"]["circleRadius"] = 1.75 self.options["World"]["scale"] = 1.0 self.options["Sensor"] = dict() self.options["Sensor"]["rayLength"] = 10 self.options["Sensor"]["numRays"] = 20 self.options["Car"] = dict() self.options["Car"]["velocity"] = 12 self.options["dt"] = 0.05 self.options["runTime"] = dict() self.options["runTime"]["supervisedTrainingTime"] = 10 self.options["runTime"]["learningRandomTime"] = 10 self.options["runTime"]["learningEvalTime"] = 10 self.options["runTime"]["defaultControllerTime"] = 10 def setDefaultOptions(self): defaultOptions = dict() defaultOptions["Reward"] = dict() defaultOptions["Reward"]["actionCost"] = 0.1 defaultOptions["Reward"]["collisionPenalty"] = 100.0 defaultOptions["Reward"]["raycastCost"] = 20.0 defaultOptions["SARSA"] = dict() defaultOptions["SARSA"]["type"] = "discrete" defaultOptions["SARSA"]["lam"] = 0.7 defaultOptions["SARSA"]["alphaStepSize"] = 0.2 defaultOptions["SARSA"]["useQLearningUpdate"] = False defaultOptions["SARSA"]["useSupervisedTraining"] = True defaultOptions["SARSA"]["epsilonGreedy"] = 0.2 defaultOptions["SARSA"]["burnInTime"] = 500 defaultOptions["SARSA"]["epsilonGreedyExponent"] = 0.3 defaultOptions["SARSA"]["exponentialDiscountFactor"] = 0.05 # so gamma = e^(-rho*dt) defaultOptions["SARSA"]["numInnerBins"] = 5 defaultOptions["SARSA"]["numOuterBins"] = 4 defaultOptions["SARSA"]["binCutoff"] = 0.5 defaultOptions["SARSA"]["forceDriveStraight"] = True defaultOptions["World"] = dict() defaultOptions["World"]["obstaclesInnerFraction"] = 0.85 defaultOptions["World"]["randomSeed"] = 40 defaultOptions["World"]["percentObsDensity"] = 7.5 defaultOptions["World"]["nonRandomWorld"] = True defaultOptions["World"]["circleRadius"] = 1.75 defaultOptions["World"]["scale"] = 1.0 defaultOptions["Sensor"] = dict() defaultOptions["Sensor"]["rayLength"] = 10 defaultOptions["Sensor"]["numRays"] = 20 defaultOptions["Car"] = dict() defaultOptions["Car"]["velocity"] = 12 defaultOptions["dt"] = 0.05 defaultOptions["runTime"] = dict() defaultOptions["runTime"]["supervisedTrainingTime"] = 10 defaultOptions["runTime"]["learningRandomTime"] = 10 defaultOptions["runTime"]["learningEvalTime"] = 10 defaultOptions["runTime"]["defaultControllerTime"] = 10 for k in defaultOptions: self.options.setdefault(k, defaultOptions[k]) for k in defaultOptions: if not isinstance(defaultOptions[k], dict): continue for j in defaultOptions[k]: self.options[k].setdefault(j, defaultOptions[k][j]) def initializeColorMap(self): self.colorMap = dict() self.colorMap["defaultRandom"] = [0, 0, 1] self.colorMap["learnedRandom"] = [1.0, 0.54901961, 0.0] # this is orange self.colorMap["learned"] = [0.58039216, 0.0, 0.82745098] # this is yellow self.colorMap["default"] = [0, 1, 0] def initialize(self): self.dt = self.options["dt"] self.controllerTypeOrder = ["defaultRandom", "learnedRandom", "learned", "default"] self.setDefaultOptions() self.Sensor = SensorObj( rayLength=self.options["Sensor"]["rayLength"], numRays=self.options["Sensor"]["numRays"] ) self.Controller = ControllerObj(self.Sensor) self.Car = CarPlant(controller=self.Controller, velocity=self.options["Car"]["velocity"]) self.Reward = Reward( self.Sensor, collisionThreshold=self.collisionThreshold, actionCost=self.options["Reward"]["actionCost"], collisionPenalty=self.options["Reward"]["collisionPenalty"], raycastCost=self.options["Reward"]["raycastCost"], ) self.setSARSA() # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName("world")) self.world = World.buildCircleWorld( percentObsDensity=self.options["World"]["percentObsDensity"], circleRadius=self.options["World"]["circleRadius"], nonRandom=self.options["World"]["nonRandomWorld"], scale=self.options["World"]["scale"], randomSeed=self.options["World"]["randomSeed"], obstaclesInnerFraction=self.options["World"]["obstaclesInnerFraction"], ) om.removeFromObjectModel(om.findObjectByName("robot")) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty("Scale", 3) self.frame.setProperty("Edit", True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.supervisedTrainingTime = self.options["runTime"]["supervisedTrainingTime"] self.learningRandomTime = self.options["runTime"]["learningRandomTime"] self.learningEvalTime = self.options["runTime"]["learningEvalTime"] self.defaultControllerTime = self.options["runTime"]["defaultControllerTime"] self.Car.setFrame(self.frame) print "Finished initialization" def setSARSA(self, type=None): if type is None: type = self.options["SARSA"]["type"] if type == "discrete": self.Sarsa = SARSADiscrete( sensorObj=self.Sensor, actionSet=self.Controller.actionSet, collisionThreshold=self.collisionThreshold, alphaStepSize=self.options["SARSA"]["alphaStepSize"], useQLearningUpdate=self.options["SARSA"]["useQLearningUpdate"], lam=self.options["SARSA"]["lam"], numInnerBins=self.options["SARSA"]["numInnerBins"], numOuterBins=self.options["SARSA"]["numOuterBins"], binCutoff=self.options["SARSA"]["binCutoff"], burnInTime=self.options["SARSA"]["burnInTime"], epsilonGreedy=self.options["SARSA"]["epsilonGreedy"], epsilonGreedyExponent=self.options["SARSA"]["epsilonGreedyExponent"], forceDriveStraight=self.options["SARSA"]["forceDriveStraight"], ) elif type == "continuous": self.Sarsa = SARSAContinuous( sensorObj=self.Sensor, actionSet=self.Controller.actionSet, lam=self.options["SARSA"]["lam"], alphaStepSize=self.options["SARSA"]["alphaStepSize"], collisionThreshold=self.collisionThreshold, burnInTime=self.options["SARSA"]["burnInTime"], epsilonGreedy=self.options["SARSA"]["epsilonGreedy"], epsilonGreedyExponent=self.options["SARSA"]["epsilonGreedyExponent"], ) else: raise ValueError("sarsa type must be either discrete or continuous") def runSingleSimulation(self, updateQValues=True, controllerType="default", simulationCutoff=None): if self.verbose: print "using QValue based controller = ", useQValueController self.setCollisionFreeInitialState() currentCarState = np.copy(self.Car.state) nextCarState = np.copy(self.Car.state) self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) nextRaycast = np.zeros(self.Sensor.numRays) self.Sarsa.resetElibilityTraces() # record the reward data runData = dict() reward = 0 discountedReward = 0 avgReward = 0 startIdx = self.counter while self.counter < self.numTimesteps - 1: idx = self.counter currentTime = self.t[idx] self.stateOverTime[idx, :] = currentCarState x = self.stateOverTime[idx, 0] y = self.stateOverTime[idx, 1] theta = self.stateOverTime[idx, 2] self.setRobotFrameState(x, y, theta) # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) self.raycastData[idx, :] = currentRaycast S_current = (currentCarState, currentRaycast) if controllerType not in self.colorMap.keys(): print raise ValueError("controller of type " + controllerType + " not supported") if controllerType in ["default", "defaultRandom"]: if controllerType == "defaultRandom": controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=True ) else: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False ) if controllerType in ["learned", "learnedRandom"]: if controllerType == "learned": randomizeControl = False else: randomizeControl = True counterForGreedyDecay = self.counter - self.idxDict["learnedRandom"] controlInput, controlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy( S_current, counter=counterForGreedyDecay, randomize=randomizeControl ) self.emptyQValue[idx] = emptyQValue if emptyQValue and self.options["SARSA"]["useSupervisedTraining"]: controlInput, controlInputIdx = self.Controller.computeControlInput( currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False ) self.controlInputData[idx] = controlInput nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt) # want to compute nextRaycast so we can do the SARSA algorithm x = nextCarState[0] y = nextCarState[1] theta = nextCarState[2] self.setRobotFrameState(x, y, theta) nextRaycast = self.Sensor.raycastAll(self.frame) # Compute the next control input S_next = (nextCarState, nextRaycast) if controllerType in ["default", "defaultRandom"]: if controllerType == "defaultRandom": nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=True ) else: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False ) if controllerType in ["learned", "learnedRandom"]: if controllerType == "learned": randomizeControl = False else: randomizeControl = True counterForGreedyDecay = self.counter - self.idxDict["learnedRandom"] nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy( S_next, counter=counterForGreedyDecay, randomize=randomizeControl ) if emptyQValue and self.options["SARSA"]["useSupervisedTraining"]: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False ) # if useQValueController: # nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(S_next) # # if not useQValueController or emptyQValue: # nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(nextCarState, currentTime, self.frame, # raycastDistance=nextRaycast) # compute the reward reward = self.Reward.computeReward(S_next, controlInput) self.rewardData[idx] = reward discountedReward += self.Sarsa.gamma ** (self.counter - startIdx) * reward avgReward += reward ###### SARSA update if updateQValues: self.Sarsa.sarsaUpdate(S_current, controlInputIdx, reward, S_next, nextControlInputIdx) # bookkeeping currentCarState = nextCarState currentRaycast = nextRaycast self.counter += 1 # break if we are in collision if self.checkInCollision(nextRaycast): if self.verbose: print "Had a collision, terminating simulation" break if self.counter >= simulationCutoff: break # fill in the last state by hand self.stateOverTime[self.counter, :] = currentCarState self.raycastData[self.counter, :] = currentRaycast # return the total reward avgRewardNoCollisionPenalty = avgReward - reward avgReward = avgReward * 1.0 / max(1, self.counter - startIdx) avgRewardNoCollisionPenalty = avgRewardNoCollisionPenalty * 1.0 / max(1, self.counter - startIdx) # this extra multiplication is so that it is in the same "units" as avgReward runData["discountedReward"] = discountedReward * (1 - self.Sarsa.gamma) runData["avgReward"] = avgReward runData["avgRewardNoCollisionPenalty"] = avgRewardNoCollisionPenalty # this just makes sure we don't get stuck in an infinite loop. if startIdx == self.counter: self.counter += 1 return runData def setNumpyRandomSeed(self, seed=1): np.random.seed(seed) def runBatchSimulation(self, endTime=None, dt=0.05): # for use in playback self.dt = self.options["dt"] self.Sarsa.setDiscountFactor(dt) self.endTime = ( self.supervisedTrainingTime + self.learningRandomTime + self.learningEvalTime + self.defaultControllerTime ) self.t = np.arange(0.0, self.endTime, dt) maxNumTimesteps = np.size(self.t) self.stateOverTime = np.zeros((maxNumTimesteps + 1, 3)) self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays)) self.controlInputData = np.zeros(maxNumTimesteps + 1) self.rewardData = np.zeros(maxNumTimesteps + 1) self.emptyQValue = np.zeros(maxNumTimesteps + 1, dtype="bool") self.numTimesteps = maxNumTimesteps self.controllerTypeOrder = ["defaultRandom", "learnedRandom", "learned", "default"] self.counter = 0 self.simulationData = [] self.initializeStatusBar() self.idxDict = dict() numRunsCounter = 0 # three while loops for different phases of simulation, supervisedTraining, learning, default self.idxDict["defaultRandom"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.supervisedTrainingTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.supervisedTrainingTime / dt) and self.counter < self.numTimesteps: self.printStatusBar() useQValueController = False startIdx = self.counter runData = self.runSingleSimulation( updateQValues=True, controllerType="defaultRandom", simulationCutoff=simCutoff ) runData["startIdx"] = startIdx runData["controllerType"] = "defaultRandom" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict["learnedRandom"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.learningRandomTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.learningRandomTime / dt) and self.counter < self.numTimesteps: self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation( updateQValues=True, controllerType="learnedRandom", simulationCutoff=simCutoff ) runData["startIdx"] = startIdx runData["controllerType"] = "learnedRandom" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict["learned"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.learningEvalTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.learningEvalTime / dt) and self.counter < self.numTimesteps: self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation( updateQValues=False, controllerType="learned", simulationCutoff=simCutoff ) runData["startIdx"] = startIdx runData["controllerType"] = "learned" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) self.idxDict["default"] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.defaultControllerTime / dt, self.numTimesteps) while (self.counter - loopStartIdx < self.defaultControllerTime / dt) and self.counter < self.numTimesteps - 1: self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation( updateQValues=False, controllerType="default", simulationCutoff=simCutoff ) runData["startIdx"] = startIdx runData["controllerType"] = "default" runData["duration"] = self.counter - runData["startIdx"] runData["endIdx"] = self.counter runData["runNumber"] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) # BOOKKEEPING # truncate stateOverTime, raycastData, controlInputs to be the correct size self.numTimesteps = self.counter + 1 self.stateOverTime = self.stateOverTime[0 : self.counter + 1, :] self.raycastData = self.raycastData[0 : self.counter + 1, :] self.controlInputData = self.controlInputData[0 : self.counter + 1] self.rewardData = self.rewardData[0 : self.counter + 1] self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime def initializeStatusBar(self): self.numTicks = 10 self.nextTickComplete = 1.0 / float(self.numTicks) self.nextTickIdx = 1 print "Simulation percentage complete: (", self.numTicks, " # is complete)" def printStatusBar(self): fractionDone = float(self.counter) / float(self.numTimesteps) if fractionDone > self.nextTickComplete: self.nextTickIdx += 1 self.nextTickComplete += 1.0 / self.numTicks timeSoFar = time.time() - self.startSimTime estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0 print "#" * self.nextTickIdx, "-" * ( self.numTicks - self.nextTickIdx ), "estimated", estimatedTimeLeft_minutes, "minutes left" def setCollisionFreeInitialState(self): tol = 5 while True: x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol, 1)[0] y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol, 1)[0] theta = np.random.uniform(0, 2 * np.pi, 1)[0] self.Car.setCarState(x, y, theta) self.setRobotFrameState(x, y, theta) if not self.checkInCollision(): break # if self.checkInCollision(): # print "IN COLLISION" # else: # print "COLLISION FREE" return x, y, theta def setupPlayback(self): self.timer = TimerCallback(targetFps=30) self.timer.callback = self.tick playButtonFps = 1.0 / self.dt print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback self.sliderMovedByPlayTimer = False panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) playButton = QtGui.QPushButton("Play/Pause") playButton.connect("clicked()", self.onPlayButton) slider = QtGui.QSlider(QtCore.Qt.Horizontal) slider.connect("valueChanged(int)", self.onSliderChanged) self.sliderMax = self.numTimesteps slider.setMaximum(self.sliderMax) self.slider = slider l.addWidget(playButton) l.addWidget(slider) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) l.addWidget(panel) w.showMaximized() self.frame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.frame) applogic.resetCamera(viewDirection=[0.2, 0, -1]) self.view.showMaximized() self.view.raise_() panel = screengrabberpanel.ScreenGrabberPanel(self.view) panel.widget.show() elapsed = time.time() - self.startSimTime simRate = self.counter / elapsed print "Total run time", elapsed print "Ticks (Hz)", simRate print "Number of steps taken", self.counter self.app.start() def run(self, launchApp=True): self.counter = 1 self.runBatchSimulation() # self.Sarsa.plotWeights() if launchApp: self.setupPlayback() def updateDrawIntersection(self, frame): origin = np.array(frame.transform.GetPosition()) # print "origin is now at", origin d = DebugData() sliderIdx = self.slider.value controllerType = self.getControllerTypeFromCounter(sliderIdx) colorMaxRange = self.colorMap[controllerType] # if the QValue was empty then color it green if self.emptyQValue[sliderIdx]: colorMaxRange = [1, 1, 0] # this is yellow for i in xrange(self.Sensor.numRays): ray = self.Sensor.rays[:, i] rayTransformed = np.array(frame.transform.TransformNormal(ray)) # print "rayTransformed is", rayTransformed intersection = self.Sensor.raycast(self.locator, origin, origin + rayTransformed * self.Sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1, 0, 0]) else: d.addLine(origin, origin + rayTransformed * self.Sensor.rayLength, color=colorMaxRange) vis.updatePolyData(d.getPolyData(), "rays", colorByName="RGB255") # camera = self.view.camera() # camera.SetFocalPoint(frame.transform.GetPosition()) # camera.SetPosition(frame.transform.TransformPoint((-30,0,10))) def getControllerTypeFromCounter(self, counter): name = self.controllerTypeOrder[0] for controllerType in self.controllerTypeOrder[1:]: if counter >= self.idxDict[controllerType]: name = controllerType return name def setRobotFrameState(self, x, y, theta): t = vtk.vtkTransform() t.Translate(x, y, 0.0) t.RotateZ(np.degrees(theta)) self.robot.getChildFrame().copyFrame(t) # returns true if we are in collision def checkInCollision(self, raycastDistance=None): if raycastDistance is None: self.setRobotFrameState(self.Car.state[0], self.Car.state[1], self.Car.state[2]) raycastDistance = self.Sensor.raycastAll(self.frame) if np.min(raycastDistance) < self.collisionThreshold: return True else: return False def tick(self): # print timer.elapsed # simulate(t.elapsed) x = np.sin(time.time()) y = np.cos(time.time()) self.setRobotFrameState(x, y, 0.0) if (time.time() - self.playTime) > self.endTime: self.playTimer.stop() def tick2(self): newtime = time.time() - self.playTime print time.time() - self.playTime x = np.sin(newtime) y = np.cos(newtime) self.setRobotFrameState(x, y, 0.0) # just increment the slider, stop the timer if we get to the end def playTimerCallback(self): self.sliderMovedByPlayTimer = True currentIdx = self.slider.value nextIdx = currentIdx + 1 self.slider.setSliderPosition(nextIdx) if currentIdx >= self.sliderMax: print "reached end of tape, stopping playTime" self.playTimer.stop() def onSliderChanged(self, value): if not self.sliderMovedByPlayTimer: self.playTimer.stop() numSteps = len(self.stateOverTime) idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax))) idx = min(idx, numSteps - 1) x, y, theta = self.stateOverTime[idx] self.setRobotFrameState(x, y, theta) self.sliderMovedByPlayTimer = False def onPlayButton(self): if self.playTimer.isActive(): self.onPauseButton() return print "play" self.playTimer.start() self.playTime = time.time() def onPauseButton(self): print "pause" self.playTimer.stop() def computeRunStatistics(self): numRuns = len(self.simulationData) runStart = np.zeros(numRuns) runDuration = np.zeros(numRuns) grid = np.arange(1, numRuns + 1) discountedReward = np.zeros(numRuns) avgReward = np.zeros(numRuns) avgRewardNoCollisionPenalty = np.zeros(numRuns) idxMap = dict() for controllerType, color in self.colorMap.iteritems(): idxMap[controllerType] = np.zeros(numRuns, dtype=bool) for idx, val in enumerate(self.simulationData): runStart[idx] = val["startIdx"] runDuration[idx] = val["duration"] discountedReward[idx] = val["discountedReward"] avgReward[idx] = val["avgReward"] avgRewardNoCollisionPenalty[idx] = val["avgRewardNoCollisionPenalty"] controllerType = val["controllerType"] idxMap[controllerType][idx] = True def plotRunData(self, controllerTypeToPlot=None, showPlot=True, onlyLearned=False): if controllerTypeToPlot == None: controllerTypeToPlot = self.colorMap.keys() if onlyLearned: controllerTypeToPlot = ["learnedRandom", "learned"] numRuns = len(self.simulationData) runStart = np.zeros(numRuns) runDuration = np.zeros(numRuns) grid = np.arange(1, numRuns + 1) discountedReward = np.zeros(numRuns) avgReward = np.zeros(numRuns) avgRewardNoCollisionPenalty = np.zeros(numRuns) idxMap = dict() for controllerType, color in self.colorMap.iteritems(): idxMap[controllerType] = np.zeros(numRuns, dtype=bool) for idx, val in enumerate(self.simulationData): runStart[idx] = val["startIdx"] runDuration[idx] = val["duration"] discountedReward[idx] = val["discountedReward"] avgReward[idx] = val["avgReward"] avgRewardNoCollisionPenalty[idx] = val["avgRewardNoCollisionPenalty"] controllerType = val["controllerType"] idxMap[controllerType][idx] = True # usedQValueController[idx] = (val['controllerType'] == "QValue") # usedDefaultController[idx] = (val['controllerType'] == "default") # usedDefaultRandomController[idx] = (val['controllerType'] == "defaultRandom") # usedQValueRandomController[idx] = (val['controllerType'] == "QValueRandom") self.runStatistics = dict() dataMap = { "duration": runDuration, "discountedReward": discountedReward, "avgReward": avgReward, "avgRewardNoCollisionPenalty": avgRewardNoCollisionPenalty, } def computeRunStatistics(dataMap): for controllerType, idx in idxMap.iteritems(): d = dict() for dataName, dataSet in dataMap.iteritems(): # average the appropriate values in dataset d[dataName] = np.sum(dataSet[idx]) / (1.0 * np.size(dataSet[idx])) self.runStatistics[controllerType] = d computeRunStatistics(dataMap) if not showPlot: return plt.figure() # # idxDefaultRandom = np.where(usedDefaultRandomController==True)[0] # idxQValueController = np.where(usedQValueController==True)[0] # idxQValueControllerRandom = np.where(usedQValueControllerRandom==True)[0] # idxDefault = np.where(usedDefaultController==True)[0] # # plotData = dict() # plotData['defaultRandom'] = {'idx': idxDefaultRandom, 'color': 'b'} # plotData['QValue'] = {'idx': idxQValueController, 'color': 'y'} # plotData['default'] = {'idx': idxDefault, 'color': 'g'} def scatterPlot(dataToPlot): for controllerType in controllerTypeToPlot: idx = idxMap[controllerType] plt.scatter(grid[idx], dataToPlot[idx], color=self.colorMap[controllerType]) def barPlot(dataName): plt.title(dataName) barWidth = 0.5 barCounter = 0 index = np.arange(len(controllerTypeToPlot)) for controllerType in controllerTypeToPlot: val = self.runStatistics[controllerType] plt.bar(barCounter, val[dataName], barWidth, color=self.colorMap[controllerType], label=controllerType) barCounter += 1 plt.xticks(index + barWidth / 2.0, controllerTypeToPlot) plt.subplot(4, 1, 1) plt.title("run duration") scatterPlot(runDuration) # for controllerType, idx in idxMap.iteritems(): # plt.scatter(grid[idx], runDuration[idx], color=self.colorMap[controllerType]) # plt.scatter(runStart[idxDefaultRandom], runDuration[idxDefaultRandom], color='b') # plt.scatter(runStart[idxQValueController], runDuration[idxQValueController], color='y') # plt.scatter(runStart[idxDefault], runDuration[idxDefault], color='g') plt.xlabel("run #") plt.ylabel("episode duration") plt.subplot(2, 1, 2) plt.title("discounted reward") scatterPlot(discountedReward) # for key, val in plotData.iteritems(): # plt.scatter(grid[idx], discountedReward[idx], color=self.colorMap[controllerType]) # plt.subplot(3,1,3) # plt.title("average reward") # scatterPlot(avgReward) # for key, val in plotData.iteritems(): # plt.scatter(grid[val['idx']],avgReward[val['idx']], color=val['color']) # plt.subplot(4,1,4) # plt.title("average reward no collision penalty") # scatterPlot(avgRewardNoCollisionPenalty) # # for key, val in plotData.iteritems(): # # plt.scatter(grid[val['idx']],avgRewardNoCollisionPenalty[val['idx']], color=val['color']) ## plot summary statistics plt.figure() plt.subplot(2, 1, 1) barPlot("duration") plt.subplot(2, 1, 2) barPlot("discountedReward") # plt.subplot(3,1,3) # barPlot("avgReward") # # plt.subplot(4,1,4) # barPlot("avgRewardNoCollisionPenalty") plt.show() def plotMultipleRunData(self, simList, toPlot=["duration", "discountedReward"], controllerType="learned"): plt.figure() numPlots = len(toPlot) grid = np.arange(len(simList)) def plot(fieldToPlot, plotNum): plt.subplot(numPlots, 1, plotNum) plt.title(fieldToPlot) val = 0 * grid barWidth = 0.5 barCounter = 0 for idx, sim in enumerate(simList): value = sim.runStatistics[controllerType][fieldToPlot] plt.bar(idx, value, barWidth) counter = 1 for fieldToPlot in toPlot: plot(fieldToPlot, counter) counter += 1 plt.show() def saveToFile(self, filename): # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime filename = "data/" + filename + ".out" my_shelf = shelve.open(filename, "n") my_shelf["options"] = self.options if self.options["SARSA"]["type"] == "discrete": my_shelf["SARSA_QValues"] = self.Sarsa.QValues my_shelf["simulationData"] = self.simulationData my_shelf["stateOverTime"] = self.stateOverTime my_shelf["raycastData"] = self.raycastData my_shelf["controlInputData"] = self.controlInputData my_shelf["emptyQValue"] = self.emptyQValue my_shelf["numTimesteps"] = self.numTimesteps my_shelf["idxDict"] = self.idxDict my_shelf["counter"] = self.counter my_shelf.close() @staticmethod def loadFromFile(filename): filename = "data/" + filename + ".out" sim = Simulator(autoInitialize=False, verbose=False) my_shelf = shelve.open(filename) sim.options = my_shelf["options"] sim.initialize() if sim.options["SARSA"]["type"] == "discrete": sim.Sarsa.QValues = np.array(my_shelf["SARSA_QValues"]) sim.simulationData = my_shelf["simulationData"] # sim.runStatistics = my_shelf['runStatistics'] sim.stateOverTime = np.array(my_shelf["stateOverTime"]) sim.raycastData = np.array(my_shelf["raycastData"]) sim.controlInputData = np.array(my_shelf["controlInputData"]) sim.emptyQValue = np.array(my_shelf["emptyQValue"]) sim.numTimesteps = my_shelf["numTimesteps"] sim.idxDict = my_shelf["idxDict"] sim.counter = my_shelf["counter"] my_shelf.close() return sim
class Simulator(object): def __init__(self, percentObsDensity=20, endTime=40, nonRandomWorld=False, circleRadius=0.7, worldScale=1.0, autoInitialize=True, verbose=True): self.verbose = verbose self.startSimTime = time.time() self.collisionThreshold = 0.4 self.randomSeed = 5 self.Sensor_rayLength = 8 self.percentObsDensity = percentObsDensity self.defaultControllerTime = 1000 self.nonRandomWorld = nonRandomWorld self.circleRadius = circleRadius self.worldScale = worldScale # create the visualizer object self.app = ConsoleApp() self.view = self.app.createView(useGrid=False) self.initializeOptions() self.initializeColorMap() if autoInitialize: self.initialize() def initializeOptions(self): self.options = dict() self.options['World'] = dict() self.options['World']['obstaclesInnerFraction'] = 0.98 self.options['World']['randomSeed'] = 40 self.options['World']['percentObsDensity'] = 10 self.options['World']['nonRandomWorld'] = True self.options['World']['circleRadius'] = 1.0 self.options['World']['scale'] = 1 self.options['Sensor'] = dict() self.options['Sensor']['rayLength'] = 20 self.options['Sensor']['numRays'] = 21 self.options['Car'] = dict() self.options['Car']['velocity'] = 4.0 self.options['dt'] = 0.05 self.options['runTime'] = dict() self.options['runTime']['defaultControllerTime'] = 100 def setDefaultOptions(self): defaultOptions = dict() defaultOptions['World'] = dict() defaultOptions['World']['obstaclesInnerFraction'] = 0.98 defaultOptions['World']['randomSeed'] = 40 defaultOptions['World']['percentObsDensity'] = 30 defaultOptions['World']['nonRandomWorld'] = True defaultOptions['World']['circleRadius'] = 1.75 defaultOptions['World']['scale'] = 2.5 defaultOptions['Sensor'] = dict() defaultOptions['Sensor']['rayLength'] = 20 defaultOptions['Sensor']['numRays'] = 41 defaultOptions['Car'] = dict() defaultOptions['Car']['velocity'] = 20 defaultOptions['dt'] = 0.05 defaultOptions['runTime'] = dict() defaultOptions['runTime']['defaultControllerTime'] = 100 for k in defaultOptions: self.options.setdefault(k, defaultOptions[k]) for k in defaultOptions: if not isinstance(defaultOptions[k], dict): continue for j in defaultOptions[k]: self.options[k].setdefault(j, defaultOptions[k][j]) def initializeColorMap(self): self.colorMap = dict() self.colorMap['default'] = [0,1,0] def initialize(self): self.dt = self.options['dt'] self.controllerTypeOrder = ['default'] self.setDefaultOptions() self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'], numRays=self.options['Sensor']['numRays']) self.SensorApproximator = SensorApproximatorObj(numRays=self.options['Sensor']['numRays'], circleRadius=self.options['World']['circleRadius'], ) self.Controller = ControllerObj(self.Sensor, self.SensorApproximator) self.Car = CarPlant(controller=self.Controller, velocity=self.options['Car']['velocity']) self.Controller.initializeVelocity(self.Car.v) # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName('world')) self.world = World.buildLineSegmentTestWorld(percentObsDensity=self.options['World']['percentObsDensity'], circleRadius=self.options['World']['circleRadius'], nonRandom=self.options['World']['nonRandomWorld'], scale=self.options['World']['scale'], randomSeed=self.options['World']['randomSeed'], obstaclesInnerFraction=self.options['World']['obstaclesInnerFraction']) om.removeFromObjectModel(om.findObjectByName('robot')) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty('Scale', 3) #self.frame.setProperty('Visible', False) #self.frame.setProperty('Edit', True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.defaultControllerTime = self.options['runTime']['defaultControllerTime'] self.Car.setFrame(self.frame) print "Finished initialization" def runSingleSimulation(self, controllerType='default', simulationCutoff=None): #self.setRandomCollisionFreeInitialState() self.setInitialStateAtZero() currentCarState = np.copy(self.Car.state) nextCarState = np.copy(self.Car.state) self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2]) firstRaycast = self.Sensor.raycastAll(self.frame) firstRaycastLocations = self.Sensor.raycastAllLocations(self.frame) self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations) self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData) self.Sensor.setLocator(self.LineSegmentLocator) nextRaycast = np.zeros(self.Sensor.numRays) # record the reward data runData = dict() startIdx = self.counter thisRunIndex = 0 NMaxSteps = 100 while (self.counter < self.numTimesteps - 1): thisRunIndex += 1 idx = self.counter currentTime = self.t[idx] self.stateOverTime[idx,:] = currentCarState x = self.stateOverTime[idx,0] y = self.stateOverTime[idx,1] theta = self.stateOverTime[idx,2] self.setRobotFrameState(x,y,theta) # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) print "current Raycast ", currentRaycast self.raycastData[idx,:] = currentRaycast S_current = (currentCarState, currentRaycast) if controllerType not in self.colorMap.keys(): print raise ValueError("controller of type " + controllerType + " not supported") if controllerType in ["default", "defaultRandom"]: controlInput, controlInputIdx = self.Controller.computeControlInput(currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False) self.controlInputData[idx] = controlInput nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt) x = nextCarState[0] y = nextCarState[1] theta = nextCarState[2] self.setRobotFrameState(x,y,theta) nextRaycast = self.Sensor.raycastAll(self.frame) # Compute the next control input S_next = (nextCarState, nextRaycast) if controllerType in ["default", "defaultRandom"]: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(nextCarState, currentTime, self.frame, raycastDistance=firstRaycast, randomize=False) #bookkeeping currentCarState = nextCarState currentRaycast = nextRaycast self.counter+=1 # break if we are in collision if self.checkInCollision(nextRaycast): if self.verbose: print "Had a collision, terminating simulation" break if thisRunIndex > NMaxSteps: print "was safe during N steps" break if self.counter >= simulationCutoff: break # fill in the last state by hand self.stateOverTime[self.counter,:] = currentCarState self.raycastData[self.counter,:] = currentRaycast # this just makes sure we don't get stuck in an infinite loop. if startIdx == self.counter: self.counter += 1 return runData def setNumpyRandomSeed(self, seed=1): np.random.seed(seed) def runBatchSimulation(self, endTime=None, dt=0.05): # for use in playback self.dt = self.options['dt'] self.endTime = self.defaultControllerTime # used to be the sum of the other times as well self.t = np.arange(0.0, self.endTime, dt) maxNumTimesteps = np.size(self.t) self.stateOverTime = np.zeros((maxNumTimesteps+1, 3)) self.raycastData = np.zeros((maxNumTimesteps+1, self.Sensor.numRays)) self.controlInputData = np.zeros(maxNumTimesteps+1) self.numTimesteps = maxNumTimesteps self.controllerTypeOrder = ['default'] self.counter = 0 self.simulationData = [] self.initializeStatusBar() self.idxDict = dict() numRunsCounter = 0 self.idxDict['default'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.defaultControllerTime/dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.defaultControllerTime/dt) and self.counter < self.numTimesteps-1): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(controllerType='default', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "default" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter+=1 self.simulationData.append(runData) # BOOKKEEPING # truncate stateOverTime, raycastData, controlInputs to be the correct size self.numTimesteps = self.counter + 1 self.stateOverTime = self.stateOverTime[0:self.counter+1, :] self.raycastData = self.raycastData[0:self.counter+1, :] self.controlInputData = self.controlInputData[0:self.counter+1] self.endTime = 1.0*self.counter/self.numTimesteps*self.endTime def initializeStatusBar(self): self.numTicks = 10 self.nextTickComplete = 1.0 / float(self.numTicks) self.nextTickIdx = 1 print "Simulation percentage complete: (", self.numTicks, " # is complete)" def printStatusBar(self): fractionDone = float(self.counter) / float(self.numTimesteps) if fractionDone > self.nextTickComplete: self.nextTickIdx += 1 self.nextTickComplete += 1.0 / self.numTicks timeSoFar = time.time() - self.startSimTime estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0 print "#" * self.nextTickIdx, "-" * (self.numTicks - self.nextTickIdx), "estimated", estimatedTimeLeft_minutes, "minutes left" def setCollisionFreeInitialState(self): tol = 5 while True: x = 0.0 y = -5.0 theta = 0 #+ np.random.uniform(0,2*np.pi,1)[0] * 0.01 self.Car.setCarState(x,y,theta) self.setRobotFrameState(x,y,theta) print "In loop" if not self.checkInCollision(): break return x,y,theta def setInitialStateAtZero(self): x = 0.0 y = 0.0 theta = 0.0 self.Car.setCarState(x,y,theta) self.setRobotFrameState(x,y,theta) return x,y,theta def setRandomCollisionFreeInitialState(self): tol = 5 while True: x = np.random.uniform(self.world.Xmin+tol, self.world.Xmax-tol, 1)[0] y = np.random.uniform(self.world.Ymin+tol, self.world.Ymax-tol, 1)[0] theta = np.random.uniform(0,2*np.pi,1)[0] self.Car.setCarState(x,y,theta) self.setRobotFrameState(x,y,theta) if not self.checkInCollision(): break return x,y,theta def setupPlayback(self): self.timer = TimerCallback(targetFps=30) self.timer.callback = self.tick playButtonFps = 1.0/self.dt print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback self.sliderMovedByPlayTimer = False panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) playButton = QtGui.QPushButton('Play/Pause') playButton.connect('clicked()', self.onPlayButton) slider = QtGui.QSlider(QtCore.Qt.Horizontal) slider.connect('valueChanged(int)', self.onSliderChanged) self.sliderMax = self.numTimesteps slider.setMaximum(self.sliderMax) self.slider = slider l.addWidget(playButton) l.addWidget(slider) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) self.view.orientationMarkerWidget().Off() l.addWidget(panel) w.showMaximized() self.frame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.frame) applogic.resetCamera(viewDirection=[0.2,0,-1]) self.view.showMaximized() self.view.raise_() panel = screengrabberpanel.ScreenGrabberPanel(self.view) panel.widget.show() cameracontrolpanel.CameraControlPanel(self.view).widget.show() elapsed = time.time() - self.startSimTime simRate = self.counter/elapsed print "Total run time", elapsed print "Ticks (Hz)", simRate print "Number of steps taken", self.counter self.app.start() def run(self, launchApp=True): self.counter = 1 self.runBatchSimulation() if launchApp: self.setupPlayback() def updateDrawIntersection(self, frame): origin = np.array(frame.transform.GetPosition()) #print "origin is now at", origin d = DebugData() sliderIdx = self.slider.value controllerType = self.getControllerTypeFromCounter(sliderIdx) colorMaxRange = self.colorMap[controllerType] for i in xrange(self.Sensor.numRays): ray = self.Sensor.rays[:,i] rayTransformed = np.array(frame.transform.TransformNormal(ray)) #print "rayTransformed is", rayTransformed intersection = self.Sensor.raycast(self.LineSegmentLocator, origin, origin + rayTransformed*self.Sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1,0,0]) else: d.addLine(origin, origin+rayTransformed*self.Sensor.rayLength, color=colorMaxRange) vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255') #camera = self.view.camera() #camera.SetFocalPoint(frame.transform.GetPosition()) #camera.SetPosition(frame.transform.TransformPoint((-30,0,10))) def getControllerTypeFromCounter(self, counter): name = self.controllerTypeOrder[0] for controllerType in self.controllerTypeOrder[1:]: if counter >= self.idxDict[controllerType]: name = controllerType return name def setRobotFrameState(self, x, y, theta): t = vtk.vtkTransform() t.Translate(x,y,0.0) t.RotateZ(np.degrees(theta)) self.robot.getChildFrame().copyFrame(t) # returns true if we are in collision def checkInCollision(self, raycastDistance=None): if raycastDistance is None: self.setRobotFrameState(self.Car.state[0],self.Car.state[1],self.Car.state[2]) raycastDistance = self.Sensor.raycastAll(self.frame) if np.min(raycastDistance) < self.collisionThreshold: return True else: return False # if raycastDistance[(len(raycastDistance)+1)/2] < self.collisionThreshold: # return True # else: # return False def tick(self): #print timer.elapsed #simulate(t.elapsed) x = np.sin(time.time()) y = np.cos(time.time()) self.setRobotFrameState(x,y,0.0) if (time.time() - self.playTime) > self.endTime: self.playTimer.stop() def tick2(self): newtime = time.time() - self.playTime print time.time() - self.playTime x = np.sin(newtime) y = np.cos(newtime) self.setRobotFrameState(x,y,0.0) # just increment the slider, stop the timer if we get to the end def playTimerCallback(self): self.sliderMovedByPlayTimer = True currentIdx = self.slider.value nextIdx = currentIdx + 1 self.slider.setSliderPosition(nextIdx) if currentIdx >= self.sliderMax: print "reached end of tape, stopping playTime" self.playTimer.stop() def onSliderChanged(self, value): if not self.sliderMovedByPlayTimer: self.playTimer.stop() numSteps = len(self.stateOverTime) idx = int(np.floor(numSteps*(1.0*value/self.sliderMax))) idx = min(idx, numSteps-1) x,y,theta = self.stateOverTime[idx] self.setRobotFrameState(x,y,theta) self.sliderMovedByPlayTimer = False def onPlayButton(self): if self.playTimer.isActive(): self.onPauseButton() return print 'play' self.playTimer.start() self.playTime = time.time() def onPauseButton(self): print 'pause' self.playTimer.stop() def saveToFile(self, filename): # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime filename = 'data/' + filename + ".out" my_shelf = shelve.open(filename,'n') my_shelf['options'] = self.options my_shelf['simulationData'] = self.simulationData my_shelf['stateOverTime'] = self.stateOverTime my_shelf['raycastData'] = self.raycastData my_shelf['controlInputData'] = self.controlInputData my_shelf['numTimesteps'] = self.numTimesteps my_shelf['idxDict'] = self.idxDict my_shelf['counter'] = self.counter my_shelf.close() @staticmethod def loadFromFile(filename): filename = 'data/' + filename + ".out" sim = Simulator(autoInitialize=False, verbose=False) my_shelf = shelve.open(filename) sim.options = my_shelf['options'] sim.initialize() sim.simulationData = my_shelf['simulationData'] sim.stateOverTime = np.array(my_shelf['stateOverTime']) sim.raycastData = np.array( my_shelf['raycastData']) sim.controlInputData = np.array(my_shelf['controlInputData']) sim.numTimesteps = my_shelf['numTimesteps'] sim.idxDict = my_shelf['idxDict'] sim.counter = my_shelf['counter'] my_shelf.close() return sim
class Simulator(object): def __init__(self, percentObsDensity=20, endTime=40, nonRandomWorld=False, circleRadius=0.7, worldScale=1.0, autoInitialize=True, verbose=True): self.verbose = verbose self.startSimTime = time.time() self.collisionThreshold = 1.3 self.randomSeed = 5 self.Sensor_rayLength = 8 self.percentObsDensity = percentObsDensity self.defaultControllerTime = 1000 self.nonRandomWorld = nonRandomWorld self.circleRadius = circleRadius self.worldScale = worldScale # create the visualizer object self.app = ConsoleApp() self.view = self.app.createView(useGrid=False) self.initializeOptions() self.initializeColorMap() if autoInitialize: self.initialize() def initializeOptions(self): self.options = dict() self.options['World'] = dict() self.options['World']['obstaclesInnerFraction'] = 0.85 self.options['World']['randomSeed'] = 40 self.options['World']['percentObsDensity'] = 7.5 self.options['World']['nonRandomWorld'] = True self.options['World']['circleRadius'] = 1.0 self.options['World']['scale'] = 1.0 self.options['Sensor'] = dict() self.options['Sensor']['rayLength'] = 10 self.options['Sensor']['numRays'] = 20 self.options['Quad'] = dict() self.options['Quad']['velocity'] = 18 self.options['dt'] = 0.05 self.options['runTime'] = dict() self.options['runTime']['defaultControllerTime'] = 10 def setDefaultOptions(self): defaultOptions = dict() defaultOptions['World'] = dict() defaultOptions['World']['obstaclesInnerFraction'] = 0.85 defaultOptions['World']['randomSeed'] = 40 defaultOptions['World']['percentObsDensity'] = 7.5 defaultOptions['World']['nonRandomWorld'] = True defaultOptions['World']['circleRadius'] = 1.75 defaultOptions['World']['scale'] = 1.0 defaultOptions['Sensor'] = dict() defaultOptions['Sensor']['rayLength'] = 10 defaultOptions['Sensor']['numRays'] = 20 defaultOptions['Quad'] = dict() defaultOptions['Quad']['velocity'] = 18 defaultOptions['dt'] = 0.05 defaultOptions['runTime'] = dict() defaultOptions['runTime']['defaultControllerTime'] = 10 for k in defaultOptions: self.options.setdefault(k, defaultOptions[k]) for k in defaultOptions: if not isinstance(defaultOptions[k], dict): continue for j in defaultOptions[k]: self.options[k].setdefault(j, defaultOptions[k][j]) def initializeColorMap(self): self.colorMap = dict() self.colorMap['default'] = [0, 1, 0] def initialize(self): self.dt = self.options['dt'] self.controllerTypeOrder = ['default'] self.setDefaultOptions() self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'], numRays=self.options['Sensor']['numRays']) self.Controller = ControllerObj(self.Sensor) self.Quad = QuadPlant(controller=self.Controller, velocity=self.options['Quad']['velocity']) # create the things needed for simulation om.removeFromObjectModel(om.findObjectByName('world')) self.world = World.buildWarehouseWorld( percentObsDensity=self.options['World']['percentObsDensity'], circleRadius=self.options['World']['circleRadius'], nonRandom=self.options['World']['nonRandomWorld'], scale=self.options['World']['scale'], randomSeed=self.options['World']['randomSeed'], obstaclesInnerFraction=self.options['World'] ['obstaclesInnerFraction']) om.removeFromObjectModel(om.findObjectByName('robot')) self.robot, self.frame = World.buildRobot() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.Sensor.setLocator(self.locator) self.frame = self.robot.getChildFrame() self.frame.setProperty('Scale', 3) self.frame.setProperty('Edit', True) self.frame.widget.HandleRotationEnabledOff() rep = self.frame.widget.GetRepresentation() rep.SetTranslateAxisEnabled(2, False) rep.SetRotateAxisEnabled(0, False) rep.SetRotateAxisEnabled(1, False) self.defaultControllerTime = self.options['runTime'][ 'defaultControllerTime'] self.Quad.setFrame(self.frame) print "Finished initialization" def runSingleSimulation(self, controllerType='default', simulationCutoff=None): self.setCollisionFreeInitialState() currentQuadState = np.copy(self.Quad.state) nextQuadState = np.copy(self.Quad.state) self.setRobotFrameState(currentQuadState[0], currentQuadState[1], currentQuadState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) nextRaycast = np.zeros(self.Sensor.numRays) # record the reward data runData = dict() startIdx = self.counter while (self.counter < self.numTimesteps - 1): idx = self.counter currentTime = self.t[idx] self.stateOverTime[idx, :] = currentQuadState x = self.stateOverTime[idx, 0] y = self.stateOverTime[idx, 1] theta = self.stateOverTime[idx, 2] self.setRobotFrameState(x, y, theta) # self.setRobotState(currentQuadState[0], currentQuadState[1], currentQuadState[2]) currentRaycast = self.Sensor.raycastAll(self.frame) self.raycastData[idx, :] = currentRaycast S_current = (currentQuadState, currentRaycast) if controllerType not in self.colorMap.keys(): print raise ValueError("controller of type " + controllerType + " not supported") if controllerType in ["default", "defaultRandom"]: controlInput, controlInputIdx = self.Controller.computeControlInput( currentQuadState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False) self.controlInputData[idx] = controlInput nextQuadState = self.Quad.simulateOneStep( controlInput=controlInput, dt=self.dt) # want to compute nextRaycast so we can do the SARSA algorithm x = nextQuadState[0] y = nextQuadState[1] theta = nextQuadState[2] self.setRobotFrameState(x, y, theta) nextRaycast = self.Sensor.raycastAll(self.frame) # Compute the next control input S_next = (nextQuadState, nextRaycast) if controllerType in ["default", "defaultRandom"]: nextControlInput, nextControlInputIdx = self.Controller.computeControlInput( nextQuadState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False) #bookkeeping currentQuadState = nextQuadState currentRaycast = nextRaycast self.counter += 1 # break if we are in collision if self.checkInCollision(nextRaycast): if self.verbose: print "Had a collision, terminating simulation" break if self.counter >= simulationCutoff: break # fill in the last state by hand self.stateOverTime[self.counter, :] = currentQuadState self.raycastData[self.counter, :] = currentRaycast # this just makes sure we don't get stuck in an infinite loop. if startIdx == self.counter: self.counter += 1 return runData def setNumpyRandomSeed(self, seed=1): np.random.seed(seed) def runBatchSimulation(self, endTime=None, dt=0.05): # for use in playback self.dt = self.options['dt'] self.endTime = self.defaultControllerTime # used to be the sum of the other times as well self.t = np.arange(0.0, self.endTime, dt) maxNumTimesteps = np.size(self.t) self.stateOverTime = np.zeros((maxNumTimesteps + 1, 3)) self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays)) self.controlInputData = np.zeros(maxNumTimesteps + 1) self.numTimesteps = maxNumTimesteps self.controllerTypeOrder = ['default'] self.counter = 0 self.simulationData = [] self.initializeStatusBar() self.idxDict = dict() numRunsCounter = 0 self.idxDict['default'] = self.counter loopStartIdx = self.counter simCutoff = min(loopStartIdx + self.defaultControllerTime / dt, self.numTimesteps) while ((self.counter - loopStartIdx < self.defaultControllerTime / dt) and self.counter < self.numTimesteps - 1): self.printStatusBar() startIdx = self.counter runData = self.runSingleSimulation(controllerType='default', simulationCutoff=simCutoff) runData['startIdx'] = startIdx runData['controllerType'] = "default" runData['duration'] = self.counter - runData['startIdx'] runData['endIdx'] = self.counter runData['runNumber'] = numRunsCounter numRunsCounter += 1 self.simulationData.append(runData) # BOOKKEEPING # truncate stateOverTime, raycastData, controlInputs to be the correct size self.numTimesteps = self.counter + 1 self.stateOverTime = self.stateOverTime[0:self.counter + 1, :] self.raycastData = self.raycastData[0:self.counter + 1, :] self.controlInputData = self.controlInputData[0:self.counter + 1] self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime def initializeStatusBar(self): self.numTicks = 10 self.nextTickComplete = 1.0 / float(self.numTicks) self.nextTickIdx = 1 print "Simulation percentage complete: (", self.numTicks, " # is complete)" def printStatusBar(self): fractionDone = float(self.counter) / float(self.numTimesteps) if fractionDone > self.nextTickComplete: self.nextTickIdx += 1 self.nextTickComplete += 1.0 / self.numTicks timeSoFar = time.time() - self.startSimTime estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0 print "#" * self.nextTickIdx, "-" * ( self.numTicks - self.nextTickIdx ), "estimated", estimatedTimeLeft_minutes, "minutes left" def setCollisionFreeInitialState(self): tol = 5 while True: x = 0.0 y = -5.0 theta = 0 #+ np.random.uniform(0,2*np.pi,1)[0] * 0.01 self.Quad.setQuadState(x, y, theta) self.setRobotFrameState(x, y, theta) print "In loop" if not self.checkInCollision(): break return x, y, theta def setRandomCollisionFreeInitialState(self): tol = 5 while True: x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol, 1)[0] y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol, 1)[0] theta = np.random.uniform(0, 2 * np.pi, 1)[0] self.Quad.setQuadState(x, y, theta) self.setRobotFrameState(x, y, theta) if not self.checkInCollision(): break return x, y, theta def setupPlayback(self): self.timer = TimerCallback(targetFps=30) self.timer.callback = self.tick playButtonFps = 1.0 / self.dt print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback self.sliderMovedByPlayTimer = False panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) playButton = QtGui.QPushButton('Play/Pause') playButton.connect('clicked()', self.onPlayButton) slider = QtGui.QSlider(QtCore.Qt.Horizontal) slider.connect('valueChanged(int)', self.onSliderChanged) self.sliderMax = self.numTimesteps slider.setMaximum(self.sliderMax) self.slider = slider l.addWidget(playButton) l.addWidget(slider) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) l.addWidget(panel) w.showMaximized() self.frame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.frame) applogic.resetCamera(viewDirection=[0.2, 0, -1]) self.view.showMaximized() self.view.raise_() panel = screengrabberpanel.ScreenGrabberPanel(self.view) panel.widget.show() elapsed = time.time() - self.startSimTime simRate = self.counter / elapsed print "Total run time", elapsed print "Ticks (Hz)", simRate print "Number of steps taken", self.counter self.app.start() def run(self, launchApp=True): self.counter = 1 self.runBatchSimulation() # self.Sarsa.plotWeights() if launchApp: self.setupPlayback() def updateDrawIntersection(self, frame): origin = np.array(frame.transform.GetPosition()) #print "origin is now at", origin d = DebugData() sliderIdx = self.slider.value controllerType = self.getControllerTypeFromCounter(sliderIdx) colorMaxRange = self.colorMap[controllerType] for j in xrange(self.Sensor.numLayers): for i in xrange(self.Sensor.numRays): ray = self.Sensor.rays[:, i, j] rayTransformed = np.array(frame.transform.TransformNormal(ray)) #print "rayTransformed is", rayTransformed intersection = self.Sensor.raycast( self.locator, origin, origin + rayTransformed * self.Sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1, 0, 0]) else: d.addLine(origin, origin + rayTransformed * self.Sensor.rayLength, color=colorMaxRange) vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255') #camera = self.view.camera() #camera.SetFocalPoint(frame.transform.GetPosition()) #camera.SetPosition(frame.transform.TransformPoint((-30,0,10))) def getControllerTypeFromCounter(self, counter): name = self.controllerTypeOrder[0] for controllerType in self.controllerTypeOrder[1:]: if counter >= self.idxDict[controllerType]: name = controllerType return name def setRobotFrameState(self, x, y, theta): t = vtk.vtkTransform() t.Translate(x, y, 0.0) t.RotateZ(np.degrees(theta)) self.robot.getChildFrame().copyFrame(t) # returns true if we are in collision def checkInCollision(self, raycastDistance=None): if raycastDistance is None: self.setRobotFrameState(self.Quad.state[0], self.Quad.state[1], self.Quad.state[2]) raycastDistance = self.Sensor.raycastAll(self.frame) if np.min(raycastDistance) < self.collisionThreshold: return True else: return False def tick(self): #print timer.elapsed #simulate(t.elapsed) x = np.sin(time.time()) y = np.cos(time.time()) self.setRobotFrameState(x, y, 0.0) if (time.time() - self.playTime) > self.endTime: self.playTimer.stop() def tick2(self): newtime = time.time() - self.playTime print time.time() - self.playTime x = np.sin(newtime) y = np.cos(newtime) self.setRobotFrameState(x, y, 0.0) # just increment the slider, stop the timer if we get to the end def playTimerCallback(self): self.sliderMovedByPlayTimer = True currentIdx = self.slider.value nextIdx = currentIdx + 1 self.slider.setSliderPosition(nextIdx) if currentIdx >= self.sliderMax: print "reached end of tape, stopping playTime" self.playTimer.stop() def onSliderChanged(self, value): if not self.sliderMovedByPlayTimer: self.playTimer.stop() numSteps = len(self.stateOverTime) idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax))) idx = min(idx, numSteps - 1) x, y, theta = self.stateOverTime[idx] self.setRobotFrameState(x, y, theta) self.sliderMovedByPlayTimer = False def onPlayButton(self): if self.playTimer.isActive(): self.onPauseButton() return print 'play' self.playTimer.start() self.playTime = time.time() def onPauseButton(self): print 'pause' self.playTimer.stop() def saveToFile(self, filename): # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime filename = 'data/' + filename + ".out" my_shelf = shelve.open(filename, 'n') my_shelf['options'] = self.options my_shelf['simulationData'] = self.simulationData my_shelf['stateOverTime'] = self.stateOverTime my_shelf['raycastData'] = self.raycastData my_shelf['controlInputData'] = self.controlInputData my_shelf['numTimesteps'] = self.numTimesteps my_shelf['idxDict'] = self.idxDict my_shelf['counter'] = self.counter my_shelf.close() @staticmethod def loadFromFile(filename): filename = 'data/' + filename + ".out" sim = Simulator(autoInitialize=False, verbose=False) my_shelf = shelve.open(filename) sim.options = my_shelf['options'] sim.initialize() sim.simulationData = my_shelf['simulationData'] sim.stateOverTime = np.array(my_shelf['stateOverTime']) sim.raycastData = np.array(my_shelf['raycastData']) sim.controlInputData = np.array(my_shelf['controlInputData']) sim.numTimesteps = my_shelf['numTimesteps'] sim.idxDict = my_shelf['idxDict'] sim.counter = my_shelf['counter'] my_shelf.close() return sim
class SimpleSimulator(object): def __init__(self): self.setDefaultOptions() self.initialize() def setDefaultOptions(self): defaultOptions = dict() defaultOptions['Sensor'] = dict() defaultOptions['Sensor']['rayLength'] = 20 defaultOptions['Sensor']['numRays'] = 100 defaultOptions['Sensor']['FOV'] = 270 defaultOptions['Sim'] = dict() defaultOptions['Sim']['dt'] = 0.05 defaultOptions['Sim']['usePursuitController'] = True defaultOptions['featureDetector'] = {} defaultOptions['featureDetector']['detectCorners'] = True self.options = defaultOptions self.app = ConsoleApp() self.view = self.app.createView(useGrid=False) def initialize(self): self.car = DubinsCar() self.sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'], numRays=self.options['Sensor']['numRays'], FOV=self.options['Sensor']['FOV']) self.controller = DubinsPIDController(self.sensor, self.car) self.world = World.buildDonutWorld() self.locator = World.buildCellLocator(self.world.visObj.polyData) self.sensor.setLocator(self.locator) self.robot, self.carFrame = World.buildRobot() self.car.setFrame(self.carFrame) self.locator = World.buildCellLocator(self.world.visObj.polyData) self.wallDetector = WallDetector(self.sensor) self.planner = SimplePlanner(self.sensor) self.pursuitController = PurePursuitController(self.car, self.sensor) self.featureDetector = FeatureDetector(self.sensor) def updateDrawIntersection(self, frame): origin = np.array(frame.transform.GetPosition()) #print "origin is now at", origin d = DebugData() colorMaxRange = [0,1,0] for i in xrange(self.sensor.numRays): ray = self.sensor.rays[:,i] rayTransformed = np.array(frame.transform.TransformNormal(ray)) #print "rayTransformed is", rayTransformed intersection = self.sensor.raycast(self.locator, origin, origin + rayTransformed*self.sensor.rayLength) if intersection is not None: d.addLine(origin, intersection, color=[1,0,0]) else: d.addLine(origin, origin+rayTransformed*self.sensor.rayLength, color=colorMaxRange) vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255') def playTimerCallback(self): self.simulateOneStep(self.options['Sim']['dt']) def simulateOneStep(self, dt): verbose = True raycastData = self.sensor.raycastAllFromCurrentFrameLocation() if self.options['Sim']['usePursuitController']: L_fw, theta = self.planner.computePursuitPoint(raycastData, plot=True) carState = self.car.state controlInput = self.pursuitController.computeControlInput(carState, dt, L_fw, theta) if self.options['featureDetector']['detectCorners']: self.featureDetector.detectCorner(raycastData, plot=True, verbose=True) newState = self.car.simulateOneStep(controlInput, dt=dt) self.car.setFrameToState() def onPlayButton(self): if self.playTimer.isActive(): self.playTimer.stop() return self.car.resetStateToFrameLocation() self.playTimer.start() def launchViewer(self): playButtonFps = 1.0/self.options['Sim']['dt'] print "playButtonFPS", playButtonFps self.playTimer = TimerCallback(targetFps=playButtonFps) self.playTimer.callback = self.playTimerCallback panel = QtGui.QWidget() l = QtGui.QHBoxLayout(panel) playButton = QtGui.QPushButton('Play/Pause') playButton.connect('clicked()', self.onPlayButton) l.addWidget(playButton) w = QtGui.QWidget() l = QtGui.QVBoxLayout(w) l.addWidget(self.view) l.addWidget(panel) w.showMaximized() self.carFrame.connectFrameModified(self.updateDrawIntersection) self.updateDrawIntersection(self.carFrame) applogic.resetCamera(viewDirection=[0.2,0,-1]) self.view.showMaximized() self.view.raise_() self.app.start() def testCrossTrackError(self): carState = np.copy(self.car.state) carState[self.car.idx['v']] = 5.0 raycastData = self.sensor.raycastAllFromCurrentFrameLocation() wallsFound = self.wallDetector.findWalls(raycastData, addNoise=False) return self.controller.computeCrossTrackError(carState, wallsFound, verbose=True) def testPursuitController(self): L_fw, theta = self.planner.testFromCurrent() self.car.resetStateToFrameLocation() v_des = 5.0 carState = self.car.state dt = 0.05 u = self.pursuitController.computeControlInput(carState, dt, L_fw, theta, v_des, verbose=True) def testDetectFeature(self): raycastData = self.sensor.raycastAllFromCurrentFrameLocation() self.featureDetector.detectCorner(raycastData, plot=True, verbose=True)