Exemplo n.º 1
0
class Simulator(object):
    def __init__(
        self,
        percentObsDensity=20,
        endTime=40,
        nonRandomWorld=False,
        circleRadius=0.7,
        worldScale=1.0,
        autoInitialize=True,
        verbose=True,
    ):
        self.verbose = verbose
        self.startSimTime = time.time()
        self.collisionThreshold = 0.4
        self.randomSeed = 5
        self.Sensor_rayLength = 8

        self.percentObsDensity = percentObsDensity
        self.defaultControllerTime = 1000
        self.nonRandomWorld = nonRandomWorld
        self.circleRadius = circleRadius
        self.worldScale = worldScale

        # create the visualizer object
        self.app = ConsoleApp()
        self.view = self.app.createView(useGrid=False)

        self.initializeOptions()
        self.initializeColorMap()

        if autoInitialize:
            self.initialize()

        self.XVelocity_drawing = 0.0
        self.YVelocity_drawing = 0.0

    def initializeOptions(self):
        self.options = dict()

        self.options["World"] = dict()
        self.options["World"]["obstaclesInnerFraction"] = 0.98
        self.options["World"]["randomSeed"] = 40
        self.options["World"]["percentObsDensity"] = 0.0
        self.options["World"]["nonRandomWorld"] = True
        self.options["World"]["circleRadius"] = 1.0
        self.options["World"]["scale"] = 1

        self.options["Sensor"] = dict()
        self.options["Sensor"]["rayLength"] = 10
        self.options["Sensor"]["numRays"] = 50

        self.options["Car"] = dict()
        self.options["Car"]["velocity"] = 4.0

        self.options["dt"] = 0.05

        self.options["runTime"] = dict()
        self.options["runTime"]["defaultControllerTime"] = 100

    def setDefaultOptions(self):

        defaultOptions = dict()

        defaultOptions["World"] = dict()
        defaultOptions["World"]["obstaclesInnerFraction"] = 0.98
        defaultOptions["World"]["randomSeed"] = 40
        defaultOptions["World"]["percentObsDensity"] = 30
        defaultOptions["World"]["nonRandomWorld"] = True
        defaultOptions["World"]["circleRadius"] = 1.75
        defaultOptions["World"]["scale"] = 2.5

        defaultOptions["Sensor"] = dict()
        defaultOptions["Sensor"]["rayLength"] = 10
        defaultOptions["Sensor"]["numRays"] = 51

        defaultOptions["Car"] = dict()
        defaultOptions["Car"]["velocity"] = 20

        defaultOptions["dt"] = 0.05

        defaultOptions["runTime"] = dict()
        defaultOptions["runTime"]["defaultControllerTime"] = 100

        for k in defaultOptions:
            self.options.setdefault(k, defaultOptions[k])

        for k in defaultOptions:
            if not isinstance(defaultOptions[k], dict):
                continue

            for j in defaultOptions[k]:
                self.options[k].setdefault(j, defaultOptions[k][j])

    def initializeColorMap(self):
        self.colorMap = dict()
        self.colorMap["default"] = [0, 1, 0]

    def initialize(self):

        self.dt = self.options["dt"]
        self.controllerTypeOrder = ["default"]

        self.setDefaultOptions()

        self.Sensor = SensorObj(
            rayLength=self.options["Sensor"]["rayLength"], numRays=self.options["Sensor"]["numRays"]
        )

        self.SensorApproximator = SensorApproximatorObj(
            numRays=self.options["Sensor"]["numRays"], circleRadius=self.options["World"]["circleRadius"]
        )

        self.Controller = ControllerObj(self.Sensor, self.SensorApproximator)

        self.Car = CarPlant(controller=self.Controller, velocity=self.options["Car"]["velocity"])

        self.Controller.initializeVelocity(self.Car.v)

        self.ActionSet = ActionSetObj()

        # create the things needed for simulation
        om.removeFromObjectModel(om.findObjectByName("world"))
        self.world = World.buildLineSegmentTestWorld(
            percentObsDensity=self.options["World"]["percentObsDensity"],
            circleRadius=self.options["World"]["circleRadius"],
            nonRandom=self.options["World"]["nonRandomWorld"],
            scale=self.options["World"]["scale"],
            randomSeed=self.options["World"]["randomSeed"],
            obstaclesInnerFraction=self.options["World"]["obstaclesInnerFraction"],
        )

        om.removeFromObjectModel(om.findObjectByName("robot"))
        self.robot, self.frame = World.buildRobot()

        self.frame = self.robot.getChildFrame()
        self.frame.setProperty("Scale", 3)
        # self.frame.setProperty('Visible', False)
        # self.frame.setProperty('Edit', True)
        self.frame.widget.HandleRotationEnabledOff()
        rep = self.frame.widget.GetRepresentation()
        rep.SetTranslateAxisEnabled(2, False)
        rep.SetRotateAxisEnabled(0, False)
        rep.SetRotateAxisEnabled(1, False)

        self.defaultControllerTime = self.options["runTime"]["defaultControllerTime"]

        self.Car.setFrame(self.frame)
        print "Finished initialization"

    def runSingleSimulation(self, controllerType="default", simulationCutoff=None):

        # self.setRandomCollisionFreeInitialState()
        self.setInitialStateAtZero()

        currentCarState = np.copy(self.Car.state)
        nextCarState = np.copy(self.Car.state)
        self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2])

        firstRaycast = self.Sensor.raycastAll(self.frame)
        firstRaycastLocations = self.Sensor.raycastAllLocations(self.frame)

        # self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations)
        # self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData)
        # self.Sensor.setLocator(self.LineSegmentLocator)

        nextRaycast = np.zeros(self.Sensor.numRays)

        # record the reward data
        runData = dict()
        startIdx = self.counter

        thisRunIndex = 0
        NMaxSteps = 100

        while self.counter < self.numTimesteps - 1:
            thisRunIndex += 1
            idx = self.counter
            currentTime = self.t[idx]
            self.stateOverTime[idx, :] = currentCarState
            x = self.stateOverTime[idx, 0]
            y = self.stateOverTime[idx, 1]

            self.setRobotFrameState(x, y, 0.0)
            # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2])
            currentRaycast = self.Sensor.raycastAll(self.frame)
            self.raycastData[idx, :] = currentRaycast
            S_current = (currentCarState, currentRaycast)

            if controllerType not in self.colorMap.keys():
                print
                raise ValueError("controller of type " + controllerType + " not supported")

            if controllerType in ["default", "defaultRandom"]:
                controlInput, controlInputIdx = self.Controller.computeControlInput(
                    currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False
                )

            self.controlInputData[idx] = controlInput

            nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt)
            print "NEXTCARSTATE is ", nextCarState

            x = nextCarState[0]
            y = nextCarState[1]

            self.setRobotFrameState(x, y, 0.0)
            nextRaycast = self.Sensor.raycastAll(self.frame)

            # Compute the next control input
            S_next = (nextCarState, nextRaycast)

            if controllerType in ["default", "defaultRandom"]:
                nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(
                    nextCarState, currentTime, self.frame, raycastDistance=firstRaycast, randomize=False
                )

            # bookkeeping
            currentCarState = nextCarState
            currentRaycast = nextRaycast
            self.counter += 1

            # break if we are in collision
            if self.checkInCollision(nextRaycast):
                if self.verbose:
                    print "Had a collision, terminating simulation"
                break

            if thisRunIndex > NMaxSteps:
                print "was safe during N steps"
                break

            if self.counter >= simulationCutoff:
                break

        # fill in the last state by hand
        self.stateOverTime[self.counter, :] = currentCarState
        self.raycastData[self.counter, :] = currentRaycast

        # this just makes sure we don't get stuck in an infinite loop.
        if startIdx == self.counter:
            self.counter += 1

        return runData

    def setNumpyRandomSeed(self, seed=1):
        np.random.seed(seed)

    def runBatchSimulation(self, endTime=None, dt=0.05):

        self.controllerTypeOrder = ["default"]
        self.counter = 0
        self.simulationData = []

        self.initializeStatusBar()

        self.idxDict = dict()
        numRunsCounter = 0

        self.idxDict["default"] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.defaultControllerTime / dt, self.numTimesteps)

        while (self.counter - loopStartIdx < self.defaultControllerTime / dt) and self.counter < self.numTimesteps - 1:
            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(controllerType="default", simulationCutoff=simCutoff)
            runData["startIdx"] = startIdx
            runData["controllerType"] = "default"
            runData["duration"] = self.counter - runData["startIdx"]
            runData["endIdx"] = self.counter
            runData["runNumber"] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        # BOOKKEEPING
        # truncate stateOverTime, raycastData, controlInputs to be the correct size
        self.numTimesteps = self.counter + 1
        self.stateOverTime = self.stateOverTime[0 : self.counter + 1, :]
        self.raycastData = self.raycastData[0 : self.counter + 1, :]
        self.controlInputData = self.controlInputData[0 : self.counter + 1]
        self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime

    def initializeStatusBar(self):
        self.numTicks = 10
        self.nextTickComplete = 1.0 / float(self.numTicks)
        self.nextTickIdx = 1
        print "Simulation percentage complete: (", self.numTicks, " # is complete)"

    def printStatusBar(self):
        fractionDone = float(self.counter) / float(self.numTimesteps)
        if fractionDone > self.nextTickComplete:

            self.nextTickIdx += 1
            self.nextTickComplete += 1.0 / self.numTicks

            timeSoFar = time.time() - self.startSimTime
            estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone
            estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0

            print "#" * self.nextTickIdx, "-" * (
                self.numTicks - self.nextTickIdx
            ), "estimated", estimatedTimeLeft_minutes, "minutes left"

    def setCollisionFreeInitialState(self):
        tol = 5

        while True:

            x = 0.0
            y = -5.0
            theta = 0  # + np.random.uniform(0,2*np.pi,1)[0] * 0.01

            self.Car.setCarState(x, y, 0.0, 0.0)
            self.setRobotFrameState(x, y, theta)

            print "In loop"

            if not self.checkInCollision():
                break

        return x, y, theta

    def setInitialStateAtZero(self):

        x = 0.0
        y = 0.0
        theta = 0.0

        self.Car.setCarState(x, y, 0.0, 0.0)
        self.setRobotFrameState(x, y, theta)

        return x, y, theta

    def setRandomCollisionFreeInitialState(self):
        tol = 5

        while True:

            x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol, 1)[0]
            y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol, 1)[0]
            theta = np.random.uniform(0, 2 * np.pi, 1)[0]

            self.Car.setCarState(x, y, theta)
            self.setRobotFrameState(x, y, theta)

            if not self.checkInCollision():
                break

        return x, y, theta

    def setupPlayback(self):

        self.timer = TimerCallback(targetFps=30)
        self.timer.callback = self.tick

        playButtonFps = 1.0 / self.dt
        print "playButtonFPS", playButtonFps
        self.playTimer = TimerCallback(targetFps=playButtonFps)
        self.playTimer.callback = self.playTimerCallback
        self.sliderMovedByPlayTimer = False

        panel = QtGui.QWidget()
        l = QtGui.QHBoxLayout(panel)

        self.max_velocity = 20.0

        sliderXVelocity = QtGui.QSlider(QtCore.Qt.Horizontal)
        sliderXVelocity.connect("valueChanged(int)", self.onXVelocityChanged)
        sliderXVelocity.setMaximum(self.max_velocity)
        sliderXVelocity.setMinimum(-self.max_velocity)

        l.addWidget(sliderXVelocity)

        sliderYVelocity = QtGui.QSlider(QtCore.Qt.Horizontal)
        sliderYVelocity.connect("valueChanged(int)", self.onYVelocityChanged)
        sliderYVelocity.setMaximum(self.max_velocity)
        sliderYVelocity.setMinimum(-self.max_velocity)
        l.addWidget(sliderYVelocity)

        firstRaycast = np.ones((self.Sensor.numRays, 1)) * 10.0 + np.random.randn(self.Sensor.numRays, 1) * 1.0
        self.drawFirstIntersections(self.frame, firstRaycast)

        randomGlobalGoalButton = QtGui.QPushButton("Initialize Random Global Goal")
        randomGlobalGoalButton.connect("clicked()", self.onRandomGlobalGoalButton)
        l.addWidget(randomGlobalGoalButton)

        randomObstaclesButton = QtGui.QPushButton("Initialize Random Obstacles")
        randomObstaclesButton.connect("clicked()", self.onRandomObstaclesButton)
        l.addWidget(randomObstaclesButton)

        buildWorldFromRandomObstaclesButton = QtGui.QPushButton("Generate Polygon World")
        buildWorldFromRandomObstaclesButton.connect("clicked()", self.onBuildWorldFromRandomObstacles)
        l.addWidget(buildWorldFromRandomObstaclesButton)

        findLocalGoalButton = QtGui.QPushButton("Find Local Goal (Heuristic)")
        findLocalGoalButton.connect("clicked()", self.onFindLocalGoalButton)
        l.addWidget(findLocalGoalButton)

        drawActionSetButton = QtGui.QPushButton("Draw Action Set")
        drawActionSetButton.connect("clicked()", self.onDrawActionSetButton)
        l.addWidget(drawActionSetButton)

        runSimButton = QtGui.QPushButton("Simulate")
        runSimButton.connect("clicked()", self.onRunSimButton)
        l.addWidget(runSimButton)

        playButton = QtGui.QPushButton("Play/Pause")
        playButton.connect("clicked()", self.onPlayButton)

        slider = QtGui.QSlider(QtCore.Qt.Horizontal)
        slider.connect("valueChanged(int)", self.onSliderChanged)
        self.sliderMax = self.numTimesteps
        slider.setMaximum(self.sliderMax)
        self.slider = slider

        # slider4 = QtGui.QSlider(QtCore.Qt.Horizontal)
        # slider4.setMaximum(self.sliderMax)
        # l.addWidget(slider4)

        # slider5 = QtGui.QSlider(QtCore.Qt.Horizontal)
        # slider5.setMaximum(self.sliderMax)
        # l.addWidget(slider5)

        # slider5 = QtGui.QSlider(QtCore.Qt.Horizontal)
        # slider5.setMaximum(self.sliderMax)
        # l.addWidget(slider5)

        # slider6 = QtGui.QSlider(QtCore.Qt.Horizontal)
        # slider6.setMaximum(self.sliderMax)
        # l.addWidget(slider6)

        # slider7 = QtGui.QSlider(QtCore.Qt.Horizontal)
        # slider7.setMaximum(self.sliderMax)
        # l.addWidget(slider7)

        l.addWidget(playButton)
        l.addWidget(slider)

        w = QtGui.QWidget()
        l = QtGui.QVBoxLayout(w)
        l.addWidget(self.view)
        self.view.orientationMarkerWidget().Off()
        l.addWidget(panel)
        w.showMaximized()

        self.frame.connectFrameModified(self.updateDrawIntersection)
        self.updateDrawIntersection(self.frame)

        applogic.resetCamera(viewDirection=[0.2, 0, -1])
        self.view.showMaximized()
        self.view.raise_()
        panel = screengrabberpanel.ScreenGrabberPanel(self.view)
        panel.widget.show()

        cameracontrolpanel.CameraControlPanel(self.view).widget.show()

        elapsed = time.time() - self.startSimTime
        simRate = self.counter / elapsed
        print "Total run time", elapsed
        print "Ticks (Hz)", simRate
        print "Number of steps taken", self.counter
        self.onRandomObstaclesButton()
        self.app.start()

    def drawFirstIntersections(self, frame, firstRaycast):
        origin = np.array(frame.transform.GetPosition())
        d = DebugData()

        firstRaycastLocations = self.Sensor.invertRaycastsToLocations(self.frame, firstRaycast)

        for i in xrange(self.Sensor.numRays):
            endpoint = firstRaycastLocations[i, :]

            if firstRaycast[i] >= self.Sensor.rayLength - 0.1:
                d.addLine(origin, endpoint, color=[0, 1, 0])
            else:
                d.addLine(origin, endpoint, color=[1, 0, 0])

        vis.updatePolyData(d.getPolyData(), "rays", colorByName="RGB255")

        self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations)
        self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData)
        self.locator = self.LineSegmentLocator
        self.Sensor.setLocator(self.LineSegmentLocator)

    def updateDrawIntersection(self, frame, locator="None"):
        if locator == "None":
            locator = self.locator

        origin = np.array(frame.transform.GetPosition())
        # print "origin is now at", origin
        d = DebugData()

        sliderIdx = self.slider.value

        controllerType = self.getControllerTypeFromCounter(sliderIdx)
        colorMaxRange = self.colorMap[controllerType]

        for i in xrange(self.Sensor.numRays):
            ray = self.Sensor.rays[:, i]
            rayTransformed = np.array(frame.transform.TransformNormal(ray))
            # print "rayTransformed is", rayTransformed
            intersection = self.Sensor.raycast(locator, origin, origin + rayTransformed * self.Sensor.rayLength)

            if intersection is not None:
                d.addLine(origin, intersection, color=[1, 0, 0])
            else:
                d.addLine(origin, origin + rayTransformed * self.Sensor.rayLength, color=colorMaxRange)

        vis.updatePolyData(d.getPolyData(), "rays", colorByName="RGB255")

        # camera = self.view.camera()
        # camera.SetFocalPoint(frame.transform.GetPosition())
        # camera.SetPosition(frame.transform.TransformPoint((-30,0,10)))

    def getControllerTypeFromCounter(self, counter):
        name = self.controllerTypeOrder[0]

        for controllerType in self.controllerTypeOrder[1:]:
            if counter >= self.idxDict[controllerType]:
                name = controllerType

        return name

    def setRobotFrameState(self, x, y, theta):
        t = vtk.vtkTransform()
        t.Translate(x, y, 0.0)
        t.RotateZ(np.degrees(theta))
        self.robot.getChildFrame().copyFrame(t)

    # returns true if we are in collision
    def checkInCollision(self, raycastDistance=None):
        if raycastDistance is None:
            self.setRobotFrameState(self.Car.state[0], self.Car.state[1], self.Car.state[2])
            raycastDistance = self.Sensor.raycastAll(self.frame)

        if np.min(raycastDistance) < self.collisionThreshold:
            return True
        else:
            return False

        # if raycastDistance[(len(raycastDistance)+1)/2] < self.collisionThreshold:
        #     return True
        # else:
        #     return False

    def tick(self):
        # print timer.elapsed
        # simulate(t.elapsed)
        x = np.sin(time.time())
        y = np.cos(time.time())
        self.setRobotFrameState(x, y, 0.0)
        if (time.time() - self.playTime) > self.endTime:
            self.playTimer.stop()

    def tick2(self):
        newtime = time.time() - self.playTime
        print time.time() - self.playTime
        x = np.sin(newtime)
        y = np.cos(newtime)
        self.setRobotFrameState(x, y, 0.0)

    # just increment the slider, stop the timer if we get to the end
    def playTimerCallback(self):
        self.sliderMovedByPlayTimer = True
        currentIdx = self.slider.value
        nextIdx = currentIdx + 1
        self.slider.setSliderPosition(nextIdx)
        if currentIdx >= self.sliderMax:
            print "reached end of tape, stopping playTime"
            self.playTimer.stop()

    def onSliderChanged(self, value):
        if not self.sliderMovedByPlayTimer:
            self.playTimer.stop()
        numSteps = len(self.stateOverTime)
        idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax)))
        idx = min(idx, numSteps - 1)
        x, y, xdot, ydot = self.stateOverTime[idx]
        self.setRobotFrameState(x, y, 0)
        self.sliderMovedByPlayTimer = False

    def onXVelocityChanged(self, value):

        self.XVelocity_drawing = value
        self.onDrawActionSetButton()
        print "x velocity changed to ", value

    def onYVelocityChanged(self, value):
        print value

        self.YVelocity_drawing = -value
        self.onDrawActionSetButton()
        print "y velocity changed to ", -value

    def onShowSensorsButton(self):
        print "I pressed the show sensors button"
        self.setInitialStateAtZero()
        firstRaycast = np.ones((self.Sensor.numRays, 1)) * 10.0 + np.random.randn(self.Sensor.numRays, 1) * 1.0
        self.drawFirstIntersections(self.frame, firstRaycast)

    def onRandomObstaclesButton(self):
        print "random obstacles button pressed"
        self.setInitialStateAtZero()
        self.world = World.buildLineSegmentTestWorld(
            percentObsDensity=8.0,
            circleRadius=self.options["World"]["circleRadius"],
            nonRandom=False,
            scale=self.options["World"]["scale"],
            randomSeed=self.options["World"]["randomSeed"],
            obstaclesInnerFraction=self.options["World"]["obstaclesInnerFraction"],
        )

        self.locator = World.buildCellLocator(self.world.visObj.polyData)
        self.Sensor.setLocator(self.locator)
        self.updateDrawIntersection(self.frame, locator=self.locator)

    def onRandomGlobalGoalButton(self):
        print "random global goal button pressed"
        self.globalGoal = World.buildGlobalGoal()

    def onBuildWorldFromRandomObstacles(self):
        distances = self.Sensor.raycastAll(self.frame)
        firstRaycastLocations = self.Sensor.invertRaycastsToLocations(self.frame, distances)

        self.polygon_initial_distances = distances
        self.polygon_initial_raycastLocations = firstRaycastLocations

        self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations)
        self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData)
        self.locator = self.LineSegmentLocator
        self.Sensor.setLocator(self.LineSegmentLocator)
        self.updateDrawIntersection(self.frame)

    def onFindLocalGoalButton(self):
        print "find local goal button pressed"

        local_goal = self.findLocalGoal()
        print local_goal, "is my local goal"

        self.localGoal = World.placeLocalGoal(local_goal)

    def findLocalGoal(self):
        biggest_gap_width = 0
        biggest_gap_start_index = 0

        current_gap_width = 0
        current_gap_start_index = 0

        for index, value in enumerate(self.polygon_initial_distances):

            # if still in a gap
            if value == self.Sensor.rayLength:
                current_gap_width += 1

            # else terminate counting for this gap
            else:
                if current_gap_width > biggest_gap_width:
                    biggest_gap_width = current_gap_width
                    biggest_gap_start_index = current_gap_start_index

                current_gap_width = 0
                current_gap_start_index = index + 1

        if current_gap_width > biggest_gap_width:
            biggest_gap_width = current_gap_width
            biggest_gap_start_index = current_gap_start_index

        middle_index_of_gap = biggest_gap_start_index + biggest_gap_width / 2

        return self.polygon_initial_raycastLocations[middle_index_of_gap, :]

    def onDrawActionSetButton(self):
        print "drawing action set"
        # self.ActionSet.computeFinalPositions(self.XVelocity_drawing,self.YVelocity_drawing)
        self.ActionSet.computeAllPositions(self.XVelocity_drawing, self.YVelocity_drawing)
        # self.ActionSet.drawActionSetFinal()
        self.ActionSet.drawActionSetFull()

    def onRunSimButton(self):
        self.runBatchSimulation()
        self.saveToFile("latest")

    def onPlayButton(self):

        if self.playTimer.isActive():
            self.onPauseButton()
            return

        print "play"
        self.playTimer.start()
        self.playTime = time.time()

    def onPauseButton(self):
        print "pause"
        self.playTimer.stop()

    def saveToFile(self, filename):

        # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime

        filename = "data/" + filename + ".out"
        my_shelf = shelve.open(filename, "n")

        my_shelf["options"] = self.options

        my_shelf["simulationData"] = self.simulationData
        my_shelf["stateOverTime"] = self.stateOverTime
        my_shelf["raycastData"] = self.raycastData
        my_shelf["controlInputData"] = self.controlInputData
        my_shelf["numTimesteps"] = self.numTimesteps
        my_shelf["idxDict"] = self.idxDict
        my_shelf["counter"] = self.counter
        my_shelf.close()

    def run(self, launchApp=True):
        self.counter = 1

        # for use in playback
        self.dt = self.options["dt"]
        self.endTime = self.defaultControllerTime  # used to be the sum of the other times as well

        self.t = np.arange(0.0, self.endTime, self.dt)
        maxNumTimesteps = np.size(self.t)

        self.stateOverTime = np.zeros((maxNumTimesteps + 1, 4))
        self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays))
        self.controlInputData = np.zeros((maxNumTimesteps + 1, 2))
        self.numTimesteps = maxNumTimesteps

        # self.runBatchSimulation()

        if launchApp:
            self.setupPlayback()

    @staticmethod
    def loadFromFile(filename):
        filename = "data/" + filename + ".out"
        sim = Simulator(autoInitialize=False, verbose=False)

        my_shelf = shelve.open(filename)
        sim.options = my_shelf["options"]
        sim.initialize()

        sim.simulationData = my_shelf["simulationData"]
        sim.stateOverTime = np.array(my_shelf["stateOverTime"])
        sim.raycastData = np.array(my_shelf["raycastData"])
        sim.controlInputData = np.array(my_shelf["controlInputData"])
        sim.numTimesteps = my_shelf["numTimesteps"]
        sim.idxDict = my_shelf["idxDict"]
        sim.counter = my_shelf["counter"]

        my_shelf.close()

        return sim
class Simulator(object):
    def __init__(self,
                 percentObsDensity=20,
                 endTime=40,
                 randomizeControl=False,
                 nonRandomWorld=False,
                 circleRadius=0.7,
                 worldScale=1.0,
                 supervisedTrainingTime=500,
                 autoInitialize=True,
                 verbose=True,
                 sarsaType="discrete"):
        self.verbose = verbose
        self.randomizeControl = randomizeControl
        self.startSimTime = time.time()
        self.collisionThreshold = 1.3
        self.randomSeed = 5
        self.Sarsa_numInnerBins = 4
        self.Sarsa_numOuterBins = 4
        self.Sensor_rayLength = 8
        self.sarsaType = sarsaType

        self.percentObsDensity = percentObsDensity
        self.supervisedTrainingTime = 10
        self.learningRandomTime = 10
        self.learningEvalTime = 10
        self.defaultControllerTime = 10
        self.nonRandomWorld = nonRandomWorld
        self.circleRadius = circleRadius
        self.worldScale = worldScale
        # create the visualizer object
        self.app = ConsoleApp()
        # view = app.createView(useGrid=False)
        self.view = self.app.createView(useGrid=False)

        self.initializeOptions()
        self.initializeColorMap()
        if autoInitialize:
            self.initialize()

    def initializeOptions(self):
        self.options = dict()

        self.options['Reward'] = dict()
        self.options['Reward']['actionCost'] = 0.1
        self.options['Reward']['collisionPenalty'] = 100.0
        self.options['Reward']['raycastCost'] = 20.0

        self.options['SARSA'] = dict()
        self.options['SARSA']['type'] = "discrete"
        self.options['SARSA']['lam'] = 0.7
        self.options['SARSA']['alphaStepSize'] = 0.2
        self.options['SARSA']['useQLearningUpdate'] = False
        self.options['SARSA']['epsilonGreedy'] = 0.2
        self.options['SARSA']['burnInTime'] = 500
        self.options['SARSA']['epsilonGreedyExponent'] = 0.3
        self.options['SARSA'][
            'exponentialDiscountFactor'] = 0.05  #so gamma = e^(-rho*dt)
        self.options['SARSA']['numInnerBins'] = 5
        self.options['SARSA']['numOuterBins'] = 4
        self.options['SARSA']['binCutoff'] = 0.5

        self.options['World'] = dict()
        self.options['World']['obstaclesInnerFraction'] = 0.85
        self.options['World']['randomSeed'] = 40
        self.options['World']['percentObsDensity'] = 7.5
        self.options['World']['nonRandomWorld'] = True
        self.options['World']['circleRadius'] = 1.75
        self.options['World']['scale'] = 1.0

        self.options['Sensor'] = dict()
        self.options['Sensor']['rayLength'] = 10
        self.options['Sensor']['numRays'] = 20

        self.options['Car'] = dict()
        self.options['Car']['velocity'] = 12

        self.options['dt'] = 0.05

        self.options['runTime'] = dict()
        self.options['runTime']['supervisedTrainingTime'] = 10
        self.options['runTime']['learningRandomTime'] = 10
        self.options['runTime']['learningEvalTime'] = 10
        self.options['runTime']['defaultControllerTime'] = 10

    def setDefaultOptions(self):

        defaultOptions = dict()

        defaultOptions['Reward'] = dict()
        defaultOptions['Reward']['actionCost'] = 0.1
        defaultOptions['Reward']['collisionPenalty'] = 100.0
        defaultOptions['Reward']['raycastCost'] = 20.0

        defaultOptions['SARSA'] = dict()
        defaultOptions['SARSA']['type'] = "discrete"
        defaultOptions['SARSA']['lam'] = 0.7
        defaultOptions['SARSA']['alphaStepSize'] = 0.2
        defaultOptions['SARSA']['useQLearningUpdate'] = False
        defaultOptions['SARSA']['useSupervisedTraining'] = True
        defaultOptions['SARSA']['epsilonGreedy'] = 0.2
        defaultOptions['SARSA']['burnInTime'] = 500
        defaultOptions['SARSA']['epsilonGreedyExponent'] = 0.3
        defaultOptions['SARSA'][
            'exponentialDiscountFactor'] = 0.05  #so gamma = e^(-rho*dt)
        defaultOptions['SARSA']['numInnerBins'] = 5
        defaultOptions['SARSA']['numOuterBins'] = 4
        defaultOptions['SARSA']['binCutoff'] = 0.5
        defaultOptions['SARSA']['forceDriveStraight'] = True

        defaultOptions['World'] = dict()
        defaultOptions['World']['obstaclesInnerFraction'] = 0.85
        defaultOptions['World']['randomSeed'] = 40
        defaultOptions['World']['percentObsDensity'] = 7.5
        defaultOptions['World']['nonRandomWorld'] = True
        defaultOptions['World']['circleRadius'] = 1.75
        defaultOptions['World']['scale'] = 1.0

        defaultOptions['Sensor'] = dict()
        defaultOptions['Sensor']['rayLength'] = 10
        defaultOptions['Sensor']['numRays'] = 20

        defaultOptions['Car'] = dict()
        defaultOptions['Car']['velocity'] = 12

        defaultOptions['dt'] = 0.05

        defaultOptions['runTime'] = dict()
        defaultOptions['runTime']['supervisedTrainingTime'] = 10
        defaultOptions['runTime']['learningRandomTime'] = 10
        defaultOptions['runTime']['learningEvalTime'] = 10
        defaultOptions['runTime']['defaultControllerTime'] = 10

        for k in defaultOptions:
            self.options.setdefault(k, defaultOptions[k])

        for k in defaultOptions:
            if not isinstance(defaultOptions[k], dict):
                continue

            for j in defaultOptions[k]:
                self.options[k].setdefault(j, defaultOptions[k][j])

    def initializeColorMap(self):
        self.colorMap = dict()
        self.colorMap['defaultRandom'] = [0, 0, 1]
        self.colorMap['learnedRandom'] = [1.0, 0.54901961,
                                          0.]  # this is orange
        self.colorMap['learned'] = [0.58039216, 0.0,
                                    0.82745098]  # this is yellow
        self.colorMap['default'] = [0, 1, 0]

    def initialize(self):

        self.dt = self.options['dt']
        self.controllerTypeOrder = [
            'defaultRandom', 'learnedRandom', 'learned', 'default'
        ]

        self.setDefaultOptions()

        self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'],
                                numRays=self.options['Sensor']['numRays'])
        self.Controller = ControllerObj(self.Sensor)
        self.Car = CarPlant(controller=self.Controller,
                            velocity=self.options['Car']['velocity'])
        self.Reward = Reward(
            self.Sensor,
            collisionThreshold=self.collisionThreshold,
            actionCost=self.options['Reward']['actionCost'],
            collisionPenalty=self.options['Reward']['collisionPenalty'],
            raycastCost=self.options['Reward']['raycastCost'])
        self.setSARSA()

        # create the things needed for simulation
        om.removeFromObjectModel(om.findObjectByName('world'))
        self.world = World.buildCircleWorld(
            percentObsDensity=self.options['World']['percentObsDensity'],
            circleRadius=self.options['World']['circleRadius'],
            nonRandom=self.options['World']['nonRandomWorld'],
            scale=self.options['World']['scale'],
            randomSeed=self.options['World']['randomSeed'],
            obstaclesInnerFraction=self.options['World']
            ['obstaclesInnerFraction'])

        om.removeFromObjectModel(om.findObjectByName('robot'))
        self.robot, self.frame = World.buildRobot()
        self.locator = World.buildCellLocator(self.world.visObj.polyData)
        self.Sensor.setLocator(self.locator)
        self.frame = self.robot.getChildFrame()
        self.frame.setProperty('Scale', 3)
        self.frame.setProperty('Edit', True)
        self.frame.widget.HandleRotationEnabledOff()
        rep = self.frame.widget.GetRepresentation()
        rep.SetTranslateAxisEnabled(2, False)
        rep.SetRotateAxisEnabled(0, False)
        rep.SetRotateAxisEnabled(1, False)

        self.supervisedTrainingTime = self.options['runTime'][
            'supervisedTrainingTime']
        self.learningRandomTime = self.options['runTime']['learningRandomTime']
        self.learningEvalTime = self.options['runTime']['learningEvalTime']
        self.defaultControllerTime = self.options['runTime'][
            'defaultControllerTime']

        self.Car.setFrame(self.frame)
        print "Finished initialization"

    def setSARSA(self, type=None):
        if type is None:
            type = self.options['SARSA']['type']

        if type == "discrete":
            self.Sarsa = SARSADiscrete(
                sensorObj=self.Sensor,
                actionSet=self.Controller.actionSet,
                collisionThreshold=self.collisionThreshold,
                alphaStepSize=self.options['SARSA']['alphaStepSize'],
                useQLearningUpdate=self.options['SARSA']['useQLearningUpdate'],
                lam=self.options['SARSA']['lam'],
                numInnerBins=self.options['SARSA']['numInnerBins'],
                numOuterBins=self.options['SARSA']['numOuterBins'],
                binCutoff=self.options['SARSA']['binCutoff'],
                burnInTime=self.options['SARSA']['burnInTime'],
                epsilonGreedy=self.options['SARSA']['epsilonGreedy'],
                epsilonGreedyExponent=self.options['SARSA']
                ['epsilonGreedyExponent'],
                forceDriveStraight=self.options['SARSA']['forceDriveStraight'])
        elif type == "continuous":
            self.Sarsa = SARSAContinuous(
                sensorObj=self.Sensor,
                actionSet=self.Controller.actionSet,
                lam=self.options['SARSA']['lam'],
                alphaStepSize=self.options['SARSA']['alphaStepSize'],
                collisionThreshold=self.collisionThreshold,
                burnInTime=self.options['SARSA']['burnInTime'],
                epsilonGreedy=self.options['SARSA']['epsilonGreedy'],
                epsilonGreedyExponent=self.options['SARSA']
                ['epsilonGreedyExponent'])
        else:
            raise ValueError(
                "sarsa type must be either discrete or continuous")

    def runSingleSimulation(self,
                            updateQValues=True,
                            controllerType='default',
                            simulationCutoff=None):

        if self.verbose:
            print "using QValue based controller = ", useQValueController

        self.setCollisionFreeInitialState()

        currentCarState = np.copy(self.Car.state)
        nextCarState = np.copy(self.Car.state)
        self.setRobotFrameState(currentCarState[0], currentCarState[1],
                                currentCarState[2])
        currentRaycast = self.Sensor.raycastAll(self.frame)
        nextRaycast = np.zeros(self.Sensor.numRays)
        self.Sarsa.resetElibilityTraces()

        # record the reward data
        runData = dict()
        reward = 0
        discountedReward = 0
        avgReward = 0
        startIdx = self.counter

        while (self.counter < self.numTimesteps - 1):
            idx = self.counter
            currentTime = self.t[idx]
            self.stateOverTime[idx, :] = currentCarState
            x = self.stateOverTime[idx, 0]
            y = self.stateOverTime[idx, 1]
            theta = self.stateOverTime[idx, 2]
            self.setRobotFrameState(x, y, theta)
            # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2])
            currentRaycast = self.Sensor.raycastAll(self.frame)
            self.raycastData[idx, :] = currentRaycast
            S_current = (currentCarState, currentRaycast)

            if controllerType not in self.colorMap.keys():
                print
                raise ValueError("controller of type " + controllerType +
                                 " not supported")

            if controllerType in ["default", "defaultRandom"]:
                if controllerType == "defaultRandom":
                    controlInput, controlInputIdx = self.Controller.computeControlInput(
                        currentCarState,
                        currentTime,
                        self.frame,
                        raycastDistance=currentRaycast,
                        randomize=True)
                else:
                    controlInput, controlInputIdx = self.Controller.computeControlInput(
                        currentCarState,
                        currentTime,
                        self.frame,
                        raycastDistance=currentRaycast,
                        randomize=False)

            if controllerType in ["learned", "learnedRandom"]:
                if controllerType == "learned":
                    randomizeControl = False
                else:
                    randomizeControl = True

                counterForGreedyDecay = self.counter - self.idxDict[
                    "learnedRandom"]
                controlInput, controlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(
                    S_current,
                    counter=counterForGreedyDecay,
                    randomize=randomizeControl)

                self.emptyQValue[idx] = emptyQValue
                if emptyQValue and self.options['SARSA'][
                        'useSupervisedTraining']:
                    controlInput, controlInputIdx = self.Controller.computeControlInput(
                        currentCarState,
                        currentTime,
                        self.frame,
                        raycastDistance=currentRaycast,
                        randomize=False)

            self.controlInputData[idx] = controlInput

            nextCarState = self.Car.simulateOneStep(controlInput=controlInput,
                                                    dt=self.dt)

            # want to compute nextRaycast so we can do the SARSA algorithm
            x = nextCarState[0]
            y = nextCarState[1]
            theta = nextCarState[2]
            self.setRobotFrameState(x, y, theta)
            nextRaycast = self.Sensor.raycastAll(self.frame)

            # Compute the next control input
            S_next = (nextCarState, nextRaycast)

            if controllerType in ["default", "defaultRandom"]:
                if controllerType == "defaultRandom":
                    nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(
                        nextCarState,
                        currentTime,
                        self.frame,
                        raycastDistance=nextRaycast,
                        randomize=True)
                else:
                    nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(
                        nextCarState,
                        currentTime,
                        self.frame,
                        raycastDistance=nextRaycast,
                        randomize=False)

            if controllerType in ["learned", "learnedRandom"]:
                if controllerType == "learned":
                    randomizeControl = False
                else:
                    randomizeControl = True

                counterForGreedyDecay = self.counter - self.idxDict[
                    "learnedRandom"]
                nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(
                    S_next,
                    counter=counterForGreedyDecay,
                    randomize=randomizeControl)

                if emptyQValue and self.options['SARSA'][
                        'useSupervisedTraining']:
                    nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(
                        nextCarState,
                        currentTime,
                        self.frame,
                        raycastDistance=nextRaycast,
                        randomize=False)

            # if useQValueController:
            #     nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(S_next)
            #
            # if not useQValueController or emptyQValue:
            #     nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(nextCarState, currentTime, self.frame,
            #                                                                             raycastDistance=nextRaycast)

            # compute the reward
            reward = self.Reward.computeReward(S_next, controlInput)
            self.rewardData[idx] = reward

            discountedReward += self.Sarsa.gamma**(self.counter -
                                                   startIdx) * reward
            avgReward += reward

            ###### SARSA update
            if updateQValues:
                self.Sarsa.sarsaUpdate(S_current, controlInputIdx, reward,
                                       S_next, nextControlInputIdx)

            #bookkeeping
            currentCarState = nextCarState
            currentRaycast = nextRaycast
            self.counter += 1
            # break if we are in collision
            if self.checkInCollision(nextRaycast):
                if self.verbose:
                    print "Had a collision, terminating simulation"
                break

            if self.counter >= simulationCutoff:
                break

        # fill in the last state by hand
        self.stateOverTime[self.counter, :] = currentCarState
        self.raycastData[self.counter, :] = currentRaycast

        # return the total reward
        avgRewardNoCollisionPenalty = avgReward - reward
        avgReward = avgReward * 1.0 / max(1, self.counter - startIdx)
        avgRewardNoCollisionPenalty = avgRewardNoCollisionPenalty * 1.0 / max(
            1, self.counter - startIdx)
        # this extra multiplication is so that it is in the same "units" as avgReward
        runData['discountedReward'] = discountedReward * (1 - self.Sarsa.gamma)
        runData['avgReward'] = avgReward
        runData['avgRewardNoCollisionPenalty'] = avgRewardNoCollisionPenalty

        # this just makes sure we don't get stuck in an infinite loop.
        if startIdx == self.counter:
            self.counter += 1

        return runData

    def setNumpyRandomSeed(self, seed=1):
        np.random.seed(seed)

    def runBatchSimulation(self, endTime=None, dt=0.05):

        # for use in playback
        self.dt = self.options['dt']
        self.Sarsa.setDiscountFactor(dt)

        self.endTime = self.supervisedTrainingTime + self.learningRandomTime + self.learningEvalTime + self.defaultControllerTime

        self.t = np.arange(0.0, self.endTime, dt)
        maxNumTimesteps = np.size(self.t)
        self.stateOverTime = np.zeros((maxNumTimesteps + 1, 3))
        self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays))
        self.controlInputData = np.zeros(maxNumTimesteps + 1)
        self.rewardData = np.zeros(maxNumTimesteps + 1)
        self.emptyQValue = np.zeros(maxNumTimesteps + 1, dtype='bool')
        self.numTimesteps = maxNumTimesteps

        self.controllerTypeOrder = [
            'defaultRandom', 'learnedRandom', 'learned', 'default'
        ]
        self.counter = 0
        self.simulationData = []

        self.initializeStatusBar()

        self.idxDict = dict()
        numRunsCounter = 0

        # three while loops for different phases of simulation, supervisedTraining, learning, default
        self.idxDict['defaultRandom'] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.supervisedTrainingTime / dt,
                        self.numTimesteps)
        while ((self.counter - loopStartIdx < self.supervisedTrainingTime / dt)
               and self.counter < self.numTimesteps):
            self.printStatusBar()
            useQValueController = False
            startIdx = self.counter
            runData = self.runSingleSimulation(updateQValues=True,
                                               controllerType='defaultRandom',
                                               simulationCutoff=simCutoff)

            runData['startIdx'] = startIdx
            runData['controllerType'] = "defaultRandom"
            runData['duration'] = self.counter - runData['startIdx']
            runData['endIdx'] = self.counter
            runData['runNumber'] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        self.idxDict['learnedRandom'] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.learningRandomTime / dt,
                        self.numTimesteps)
        while ((self.counter - loopStartIdx < self.learningRandomTime / dt)
               and self.counter < self.numTimesteps):
            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(updateQValues=True,
                                               controllerType='learnedRandom',
                                               simulationCutoff=simCutoff)
            runData['startIdx'] = startIdx
            runData['controllerType'] = "learnedRandom"
            runData['duration'] = self.counter - runData['startIdx']
            runData['endIdx'] = self.counter
            runData['runNumber'] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        self.idxDict['learned'] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.learningEvalTime / dt,
                        self.numTimesteps)
        while ((self.counter - loopStartIdx < self.learningEvalTime / dt)
               and self.counter < self.numTimesteps):

            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(updateQValues=False,
                                               controllerType='learned',
                                               simulationCutoff=simCutoff)
            runData['startIdx'] = startIdx
            runData['controllerType'] = "learned"
            runData['duration'] = self.counter - runData['startIdx']
            runData['endIdx'] = self.counter
            runData['runNumber'] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        self.idxDict['default'] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.defaultControllerTime / dt,
                        self.numTimesteps)
        while ((self.counter - loopStartIdx < self.defaultControllerTime / dt)
               and self.counter < self.numTimesteps - 1):
            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(updateQValues=False,
                                               controllerType='default',
                                               simulationCutoff=simCutoff)
            runData['startIdx'] = startIdx
            runData['controllerType'] = "default"
            runData['duration'] = self.counter - runData['startIdx']
            runData['endIdx'] = self.counter
            runData['runNumber'] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        # BOOKKEEPING
        # truncate stateOverTime, raycastData, controlInputs to be the correct size
        self.numTimesteps = self.counter + 1
        self.stateOverTime = self.stateOverTime[0:self.counter + 1, :]
        self.raycastData = self.raycastData[0:self.counter + 1, :]
        self.controlInputData = self.controlInputData[0:self.counter + 1]
        self.rewardData = self.rewardData[0:self.counter + 1]
        self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime

    def initializeStatusBar(self):
        self.numTicks = 10
        self.nextTickComplete = 1.0 / float(self.numTicks)
        self.nextTickIdx = 1
        print "Simulation percentage complete: (", self.numTicks, " # is complete)"

    def printStatusBar(self):
        fractionDone = float(self.counter) / float(self.numTimesteps)
        if fractionDone > self.nextTickComplete:

            self.nextTickIdx += 1
            self.nextTickComplete += 1.0 / self.numTicks

            timeSoFar = time.time() - self.startSimTime
            estimatedTimeLeft_sec = (1 -
                                     fractionDone) * timeSoFar / fractionDone
            estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0

            print "#" * self.nextTickIdx, "-" * (
                self.numTicks - self.nextTickIdx
            ), "estimated", estimatedTimeLeft_minutes, "minutes left"

    def setCollisionFreeInitialState(self):
        tol = 5

        while True:
            x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol,
                                  1)[0]
            y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol,
                                  1)[0]
            theta = np.random.uniform(0, 2 * np.pi, 1)[0]
            self.Car.setCarState(x, y, theta)
            self.setRobotFrameState(x, y, theta)

            if not self.checkInCollision():
                break

        # if self.checkInCollision():
        #     print "IN COLLISION"
        # else:
        #     print "COLLISION FREE"

        return x, y, theta

    def setupPlayback(self):

        self.timer = TimerCallback(targetFps=30)
        self.timer.callback = self.tick

        playButtonFps = 1.0 / self.dt
        print "playButtonFPS", playButtonFps
        self.playTimer = TimerCallback(targetFps=playButtonFps)
        self.playTimer.callback = self.playTimerCallback
        self.sliderMovedByPlayTimer = False

        panel = QtGui.QWidget()
        l = QtGui.QHBoxLayout(panel)

        playButton = QtGui.QPushButton('Play/Pause')
        playButton.connect('clicked()', self.onPlayButton)

        slider = QtGui.QSlider(QtCore.Qt.Horizontal)
        slider.connect('valueChanged(int)', self.onSliderChanged)
        self.sliderMax = self.numTimesteps
        slider.setMaximum(self.sliderMax)
        self.slider = slider

        l.addWidget(playButton)
        l.addWidget(slider)

        w = QtGui.QWidget()
        l = QtGui.QVBoxLayout(w)
        l.addWidget(self.view)
        l.addWidget(panel)
        w.showMaximized()

        self.frame.connectFrameModified(self.updateDrawIntersection)
        self.updateDrawIntersection(self.frame)

        applogic.resetCamera(viewDirection=[0.2, 0, -1])
        self.view.showMaximized()
        self.view.raise_()
        panel = screengrabberpanel.ScreenGrabberPanel(self.view)
        panel.widget.show()

        elapsed = time.time() - self.startSimTime
        simRate = self.counter / elapsed
        print "Total run time", elapsed
        print "Ticks (Hz)", simRate
        print "Number of steps taken", self.counter
        self.app.start()

    def run(self, launchApp=True):
        self.counter = 1
        self.runBatchSimulation()
        # self.Sarsa.plotWeights()

        if launchApp:
            self.setupPlayback()

    def updateDrawIntersection(self, frame):

        origin = np.array(frame.transform.GetPosition())
        #print "origin is now at", origin
        d = DebugData()

        sliderIdx = self.slider.value

        controllerType = self.getControllerTypeFromCounter(sliderIdx)
        colorMaxRange = self.colorMap[controllerType]

        # if the QValue was empty then color it green
        if self.emptyQValue[sliderIdx]:
            colorMaxRange = [1, 1, 0]  # this is yellow

        for i in xrange(self.Sensor.numRays):
            ray = self.Sensor.rays[:, i]
            rayTransformed = np.array(frame.transform.TransformNormal(ray))
            #print "rayTransformed is", rayTransformed
            intersection = self.Sensor.raycast(
                self.locator, origin,
                origin + rayTransformed * self.Sensor.rayLength)

            if intersection is not None:
                d.addLine(origin, intersection, color=[1, 0, 0])
            else:
                d.addLine(origin,
                          origin + rayTransformed * self.Sensor.rayLength,
                          color=colorMaxRange)

        vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255')

        #camera = self.view.camera()
        #camera.SetFocalPoint(frame.transform.GetPosition())
        #camera.SetPosition(frame.transform.TransformPoint((-30,0,10)))

    def getControllerTypeFromCounter(self, counter):
        name = self.controllerTypeOrder[0]

        for controllerType in self.controllerTypeOrder[1:]:
            if counter >= self.idxDict[controllerType]:
                name = controllerType

        return name

    def setRobotFrameState(self, x, y, theta):
        t = vtk.vtkTransform()
        t.Translate(x, y, 0.0)
        t.RotateZ(np.degrees(theta))
        self.robot.getChildFrame().copyFrame(t)

    # returns true if we are in collision
    def checkInCollision(self, raycastDistance=None):
        if raycastDistance is None:
            self.setRobotFrameState(self.Car.state[0], self.Car.state[1],
                                    self.Car.state[2])
            raycastDistance = self.Sensor.raycastAll(self.frame)

        if np.min(raycastDistance) < self.collisionThreshold:
            return True
        else:
            return False

    def tick(self):
        #print timer.elapsed
        #simulate(t.elapsed)
        x = np.sin(time.time())
        y = np.cos(time.time())
        self.setRobotFrameState(x, y, 0.0)
        if (time.time() - self.playTime) > self.endTime:
            self.playTimer.stop()

    def tick2(self):
        newtime = time.time() - self.playTime
        print time.time() - self.playTime
        x = np.sin(newtime)
        y = np.cos(newtime)
        self.setRobotFrameState(x, y, 0.0)

    # just increment the slider, stop the timer if we get to the end
    def playTimerCallback(self):
        self.sliderMovedByPlayTimer = True
        currentIdx = self.slider.value
        nextIdx = currentIdx + 1
        self.slider.setSliderPosition(nextIdx)
        if currentIdx >= self.sliderMax:
            print "reached end of tape, stopping playTime"
            self.playTimer.stop()

    def onSliderChanged(self, value):
        if not self.sliderMovedByPlayTimer:
            self.playTimer.stop()
        numSteps = len(self.stateOverTime)
        idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax)))
        idx = min(idx, numSteps - 1)
        x, y, theta = self.stateOverTime[idx]
        self.setRobotFrameState(x, y, theta)
        self.sliderMovedByPlayTimer = False

    def onPlayButton(self):

        if self.playTimer.isActive():
            self.onPauseButton()
            return

        print 'play'
        self.playTimer.start()
        self.playTime = time.time()

    def onPauseButton(self):
        print 'pause'
        self.playTimer.stop()

    def computeRunStatistics(self):
        numRuns = len(self.simulationData)
        runStart = np.zeros(numRuns)
        runDuration = np.zeros(numRuns)
        grid = np.arange(1, numRuns + 1)
        discountedReward = np.zeros(numRuns)
        avgReward = np.zeros(numRuns)
        avgRewardNoCollisionPenalty = np.zeros(numRuns)

        idxMap = dict()

        for controllerType, color in self.colorMap.iteritems():
            idxMap[controllerType] = np.zeros(numRuns, dtype=bool)

        for idx, val in enumerate(self.simulationData):
            runStart[idx] = val['startIdx']
            runDuration[idx] = val['duration']
            discountedReward[idx] = val['discountedReward']
            avgReward[idx] = val['avgReward']
            avgRewardNoCollisionPenalty[idx] = val[
                'avgRewardNoCollisionPenalty']
            controllerType = val['controllerType']
            idxMap[controllerType][idx] = True

    def plotRunData(self,
                    controllerTypeToPlot=None,
                    showPlot=True,
                    onlyLearned=False):

        if controllerTypeToPlot == None:
            controllerTypeToPlot = self.colorMap.keys()

        if onlyLearned:
            controllerTypeToPlot = ['learnedRandom', 'learned']

        numRuns = len(self.simulationData)
        runStart = np.zeros(numRuns)
        runDuration = np.zeros(numRuns)
        grid = np.arange(1, numRuns + 1)
        discountedReward = np.zeros(numRuns)
        avgReward = np.zeros(numRuns)
        avgRewardNoCollisionPenalty = np.zeros(numRuns)

        idxMap = dict()

        for controllerType, color in self.colorMap.iteritems():
            idxMap[controllerType] = np.zeros(numRuns, dtype=bool)

        for idx, val in enumerate(self.simulationData):
            runStart[idx] = val['startIdx']
            runDuration[idx] = val['duration']
            discountedReward[idx] = val['discountedReward']
            avgReward[idx] = val['avgReward']
            avgRewardNoCollisionPenalty[idx] = val[
                'avgRewardNoCollisionPenalty']
            controllerType = val['controllerType']
            idxMap[controllerType][idx] = True

            # usedQValueController[idx] = (val['controllerType'] == "QValue")
            # usedDefaultController[idx] = (val['controllerType'] == "default")
            # usedDefaultRandomController[idx] = (val['controllerType'] == "defaultRandom")
            # usedQValueRandomController[idx] = (val['controllerType'] == "QValueRandom")

        self.runStatistics = dict()
        dataMap = {
            'duration': runDuration,
            'discountedReward': discountedReward,
            'avgReward': avgReward,
            'avgRewardNoCollisionPenalty': avgRewardNoCollisionPenalty
        }

        def computeRunStatistics(dataMap):
            for controllerType, idx in idxMap.iteritems():
                d = dict()
                for dataName, dataSet in dataMap.iteritems():
                    # average the appropriate values in dataset
                    d[dataName] = np.sum(
                        dataSet[idx]) / (1.0 * np.size(dataSet[idx]))

                self.runStatistics[controllerType] = d

        computeRunStatistics(dataMap)

        if not showPlot:
            return

        plt.figure()

        #
        # idxDefaultRandom = np.where(usedDefaultRandomController==True)[0]
        # idxQValueController = np.where(usedQValueController==True)[0]
        # idxQValueControllerRandom = np.where(usedQValueControllerRandom==True)[0]
        # idxDefault = np.where(usedDefaultController==True)[0]
        #
        # plotData = dict()
        # plotData['defaultRandom'] = {'idx': idxDefaultRandom, 'color': 'b'}
        # plotData['QValue'] = {'idx': idxQValueController, 'color': 'y'}
        # plotData['default'] = {'idx': idxDefault, 'color': 'g'}

        def scatterPlot(dataToPlot):
            for controllerType in controllerTypeToPlot:
                idx = idxMap[controllerType]
                plt.scatter(grid[idx],
                            dataToPlot[idx],
                            color=self.colorMap[controllerType])

        def barPlot(dataName):
            plt.title(dataName)
            barWidth = 0.5
            barCounter = 0
            index = np.arange(len(controllerTypeToPlot))

            for controllerType in controllerTypeToPlot:
                val = self.runStatistics[controllerType]
                plt.bar(barCounter,
                        val[dataName],
                        barWidth,
                        color=self.colorMap[controllerType],
                        label=controllerType)
                barCounter += 1

            plt.xticks(index + barWidth / 2.0, controllerTypeToPlot)

        plt.subplot(4, 1, 1)
        plt.title('run duration')
        scatterPlot(runDuration)
        # for controllerType, idx in idxMap.iteritems():
        #     plt.scatter(grid[idx], runDuration[idx], color=self.colorMap[controllerType])

        # plt.scatter(runStart[idxDefaultRandom], runDuration[idxDefaultRandom], color='b')
        # plt.scatter(runStart[idxQValueController], runDuration[idxQValueController], color='y')
        # plt.scatter(runStart[idxDefault], runDuration[idxDefault], color='g')
        plt.xlabel('run #')
        plt.ylabel('episode duration')

        plt.subplot(2, 1, 2)
        plt.title('discounted reward')
        scatterPlot(discountedReward)
        # for key, val in plotData.iteritems():
        #     plt.scatter(grid[idx], discountedReward[idx], color=self.colorMap[controllerType])

        # plt.subplot(3,1,3)
        # plt.title("average reward")
        # scatterPlot(avgReward)
        # for key, val in plotData.iteritems():
        #     plt.scatter(grid[val['idx']],avgReward[val['idx']], color=val['color'])

        # plt.subplot(4,1,4)
        # plt.title("average reward no collision penalty")
        # scatterPlot(avgRewardNoCollisionPenalty)
        # # for key, val in plotData.iteritems():
        # #     plt.scatter(grid[val['idx']],avgRewardNoCollisionPenalty[val['idx']], color=val['color'])

        ## plot summary statistics
        plt.figure()

        plt.subplot(2, 1, 1)
        barPlot("duration")

        plt.subplot(2, 1, 2)
        barPlot("discountedReward")

        # plt.subplot(3,1,3)
        # barPlot("avgReward")
        #
        # plt.subplot(4,1,4)
        # barPlot("avgRewardNoCollisionPenalty")

        plt.show()

    def plotMultipleRunData(self,
                            simList,
                            toPlot=['duration', 'discountedReward'],
                            controllerType='learned'):

        plt.figure()
        numPlots = len(toPlot)

        grid = np.arange(len(simList))

        def plot(fieldToPlot, plotNum):
            plt.subplot(numPlots, 1, plotNum)
            plt.title(fieldToPlot)
            val = 0 * grid
            barWidth = 0.5
            barCounter = 0
            for idx, sim in enumerate(simList):
                value = sim.runStatistics[controllerType][fieldToPlot]
                plt.bar(idx, value, barWidth)

        counter = 1
        for fieldToPlot in toPlot:
            plot(fieldToPlot, counter)
            counter += 1

        plt.show()

    def saveToFile(self, filename):

        # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime

        filename = 'data/' + filename + ".out"
        my_shelf = shelve.open(filename, 'n')

        my_shelf['options'] = self.options

        if self.options['SARSA']['type'] == "discrete":
            my_shelf['SARSA_QValues'] = self.Sarsa.QValues

        my_shelf['simulationData'] = self.simulationData
        my_shelf['stateOverTime'] = self.stateOverTime
        my_shelf['raycastData'] = self.raycastData
        my_shelf['controlInputData'] = self.controlInputData
        my_shelf['emptyQValue'] = self.emptyQValue
        my_shelf['numTimesteps'] = self.numTimesteps
        my_shelf['idxDict'] = self.idxDict
        my_shelf['counter'] = self.counter
        my_shelf.close()

    @staticmethod
    def loadFromFile(filename):
        filename = 'data/' + filename + ".out"
        sim = Simulator(autoInitialize=False, verbose=False)

        my_shelf = shelve.open(filename)
        sim.options = my_shelf['options']
        sim.initialize()

        if sim.options['SARSA']['type'] == "discrete":
            sim.Sarsa.QValues = np.array(my_shelf['SARSA_QValues'])

        sim.simulationData = my_shelf['simulationData']
        # sim.runStatistics = my_shelf['runStatistics']
        sim.stateOverTime = np.array(my_shelf['stateOverTime'])
        sim.raycastData = np.array(my_shelf['raycastData'])
        sim.controlInputData = np.array(my_shelf['controlInputData'])
        sim.emptyQValue = np.array(my_shelf['emptyQValue'])
        sim.numTimesteps = my_shelf['numTimesteps']
        sim.idxDict = my_shelf['idxDict']
        sim.counter = my_shelf['counter']

        my_shelf.close()

        return sim
Exemplo n.º 3
0
class Simulator(object):


    def __init__(self, percentObsDensity=20, endTime=40, nonRandomWorld=False,
                 circleRadius=0.7, worldScale=1.0, autoInitialize=True, verbose=True):
        self.verbose = verbose
        self.startSimTime = time.time()
        self.collisionThreshold = 1.0
        self.randomSeed = 5
        self.Sensor_rayLength = 8

        self.percentObsDensity = percentObsDensity
        self.defaultControllerTime = 1000
        self.nonRandomWorld = nonRandomWorld
        self.circleRadius = circleRadius
        self.worldScale = worldScale

        # create the visualizer object
        self.app = ConsoleApp()
        self.view = self.app.createView(useGrid=False)

        self.initializeOptions()
        self.initializeColorMap()

        self.initial_XVelocity = 0.0
        self.initial_YVelocity = 0.0

        self.running_sim = True

        self.currentIdx = 0;
        
        if autoInitialize:
            self.initialize()

    def initializeOptions(self):
        self.options = dict()

        self.options['World'] = dict()
        self.options['World']['obstaclesInnerFraction'] = 0.98
        self.options['World']['randomSeed'] = 40
        self.options['World']['percentObsDensity'] = 25.0
        self.options['World']['nonRandomWorld'] = True
        self.options['World']['circleRadius'] = 0.6
        self.circleRadius = self.options['World']['circleRadius']
        self.options['World']['scale'] = 2.0

        self.options['Sensor'] = dict()
        self.options['Sensor']['rayLength'] = 10
        self.options['Sensor']['numRays'] = 40


        self.options['Car'] = dict()
        self.options['Car']['velocity'] = 20

        self.options['dt'] = 0.05

        self.options['runTime'] = dict()
        self.options['runTime']['defaultControllerTime'] = 100


    def initializeColorMap(self):
        self.colorMap = dict()
        self.colorMap['default'] = [0,1,0]

    def initialize(self):

        self.dt = self.options['dt']
        self.controllerTypeOrder = ['default']

        self.Controller = ControllerObj()

        self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'],
                                numRays=self.options['Sensor']['numRays'])

        self.Car = CarPlant(controller=self.Controller,
                            velocity=self.options['Car']['velocity'])

        self.Controller.initializeVelocity(self.Car.v)

        self.ActionSet = ActionSetObj()

        # create the things needed for simulation
        om.removeFromObjectModel(om.findObjectByName('world'))
        self.world = World.buildCircleWorld(percentObsDensity=self.options['World']['percentObsDensity'],
                                            circleRadius=self.options['World']['circleRadius'],
                                            nonRandom=self.options['World']['nonRandomWorld'],
                                            scale=self.options['World']['scale'],
                                            randomSeed=self.options['World']['randomSeed'],
                                            obstaclesInnerFraction=self.options['World']['obstaclesInnerFraction'])

        om.removeFromObjectModel(om.findObjectByName('robot'))
        self.robot, self.frame = World.buildRobot()
        self.locator = World.buildCellLocator(self.world.visObj.polyData)
        
        #will need to set locator for purple trajectories
        #self.Sensor.setLocator(self.locator)
        self.frame = self.robot.getChildFrame()
        self.frame.setProperty('Scale', 3)
        #self.frame.setProperty('Visible', False)
        #self.frame.setProperty('Edit', True)
        self.frame.widget.HandleRotationEnabledOff()
        rep = self.frame.widget.GetRepresentation()
        rep.SetTranslateAxisEnabled(2, False)
        rep.SetRotateAxisEnabled(0, False)
        rep.SetRotateAxisEnabled(1, False)

        self.defaultControllerTime = self.options['runTime']['defaultControllerTime']

        self.Car.setFrame(self.frame)
        print "Finished initialization"


    def terminalEuclideanCostForTrajectories(self):
        # access the trajectories
        x_traj = self.ActionSet.p_x_trajectories
        y_traj = self.ActionSet.p_y_trajectories


        final_x_positions = x_traj[:,-1]
        final_y_positions = y_traj[:,-1]

        #access the global goal
        global_goal_x = self.globalGoal.global_goal_x
        global_goal_y = self.globalGoal.global_goal_y

        euclideans_vector = []

        # def getKey(item):
        #     return item[1]

        for i in xrange(self.ActionSet.num_x_bins):
            for j in xrange(self.ActionSet.num_y_bins):
                distance = np.sqrt((final_x_positions[i] - global_goal_x)**2 + (final_y_positions[j] - global_goal_y)**2)
                euclideans_vector.append(distance)


        #sorted_ranks_with_indices = sorted(ranks_with_indices,key=getKey)

        return np.array(euclideans_vector)

    def CheckIfCollisionFreeTrajectoryIndex(self, traj_index):
        # accessing the right trajectory
        x_trajectory_to_check = self.ActionSet.p_x_trajectories[traj_index[0]]
        y_trajectory_to_check = self.ActionSet.p_y_trajectories[traj_index[1]]


        #check if anything is too close to the center of the circles
        #print self.world.list_of_circles
        for i in range(len(x_trajectory_to_check)):
            point = ( x_trajectory_to_check[i], y_trajectory_to_check[i]  )
            if not self.CheckIfCollisionFreePoint(point):
                #print "trajectory wasn't free"
                return False 


        return True

    def CheckIfCollisionFreePoint(self, point):

        for circle_center in self.world.list_of_circles:
            #print "circle centers first"
            #print circle_center[0], circle_center[1]
            #print self.circleRadius
            #print point[0], point[1]
            #print (circle_center[0] - point[0])**2 + (circle_center[1]-point[1])**2
            if  ( (circle_center[0] - point[0])**2 + (circle_center[1]-point[1])**2 < self.circleRadius**2):
                #print "I found a point in a circle"
                return False

        if point[0] > self.world.Xmax:
            #print "I would have gone past top wall"
            return False
        if point[0] < self.world.Xmin:
            #print "I would have gone below bottom wall"
            return False
        if point[1] > self.world.Ymax:
            #print "I would have gone to the right of the side wall"
            return False    
        if point[1] < self.world.Ymin:
            #print "I would have gone to the left of the side wall"
            return False

        return True

    def computeProbabilitiesOfCollisionAllTrajectories(self, currentRaycastIntersectionLocations, speed_allowed_matrix):
        probability_vector = []
        indices_vector = []
        for x_index in xrange(self.ActionSet.num_x_bins):
            for y_index in xrange(self.ActionSet.num_y_bins):
                if not speed_allowed_matrix[x_index, y_index]:
                    probability_vector.append(1.0)

                else:
                    probability_of_collision = self.computeProbabilityOfCollisionOneTrajectory(x_index, y_index, currentRaycastIntersectionLocations)
                    probability_vector.append(probability_of_collision)

                indices_vector.append([x_index, y_index])

        return np.array(probability_vector), indices_vector
        

    def computeProbabilityOfCollisionOneTrajectory(self, x_index, y_index, currentRaycastIntersectionLocations):
        probability_no_collision = 1
        for time_step_index, time_step_value in enumerate(self.ActionSet.t_vector):
            for obstacle_center in currentRaycastIntersectionLocations:

                probability_of_collision_step_k_obstacle_j = self.computeProbabilityOfCollisionOneStepOneObstacle(x_index, y_index, time_step_index, time_step_value, obstacle_center)

                probability_no_collision_step_k_obstacle_j = 1 - probability_of_collision_step_k_obstacle_j

                probability_no_collision = probability_no_collision * probability_no_collision_step_k_obstacle_j

        return float(1.0 - probability_no_collision)

    def computeProbabilityOfCollisionOneStepOneObstacle(self, x_index, y_index, time_step_index, time_step_value, obstacle_center):
        volume = 4.18
        Sigma_sensor = np.zeros((3,3))
        np.fill_diagonal(Sigma_sensor, self.sigma_sensor)

        variance_x = (2.5 + abs(self.current_initial_velocity_x*0.1))*time_step_value+0.01
        variance_y = (2.5 + abs(self.current_initial_velocity_y*0.1))*time_step_value+0.01
        variance_z = (1.5)*time_step_value+0.01

        Sigma_robot = np.zeros((3,3))
        np.fill_diagonal(Sigma_sensor, [variance_x, variance_y, variance_z])


        Sigma_C = Sigma_robot + Sigma_sensor

        denominator = np.sqrt(np.linalg.det(2*np.pi*Sigma_C))

        x = self.ActionSet.p_x_trajectories[x_index, time_step_index]
        y = self.ActionSet.p_y_trajectories[y_index, time_step_index]
        z = 0

        x_robot = np.array([x, y, z])
        obstacle_center = np.array(obstacle_center)

        exponent = - 0.5 * np.matrix((x_robot - obstacle_center)) * np.matrix(np.linalg.inv(Sigma_C)) * np.matrix((x_robot - obstacle_center)).T

        return volume / denominator * np.exp(exponent)

    def computeJerkVector(self,controlInput):
        jerk_vector = []
        for x_index in xrange(self.ActionSet.num_x_bins):
            for y_index in xrange(self.ActionSet.num_y_bins):
                last_x_accel = controlInput[0]
                last_y_accel = controlInput[1]

                this_x_accel = self.ActionSet.a_x[x_index]
                this_y_accel = self.ActionSet.a_y[y_index]

                jerk = np.sqrt( (last_x_accel - this_x_accel)**2 + (last_y_accel - this_y_accel)**2 )
                jerk_vector.append(jerk)

        return np.array(jerk_vector) 


    def identifySpeedAllowedTrajectories(self):
        speed_allowed_matrix = np.ones((self.ActionSet.num_x_bins, self.ActionSet.num_y_bins))

        # for x_index in xrange(self.ActionSet.num_x_bins):
        #     for y_index in xrange(self.ActionSet.num_y_bins):

        #         this_x_accel = self.ActionSet.a_x[x_index]
        #         this_y_accel = self.ActionSet.a_y[y_index]

        #         if np.sqrt( (self.current_initial_velocity_x - this_x_accel)**2 + (self.current_initial_velocity_y - this_y_accel)**2) < self.speed_max:
        #             speed_allowed_matrix[x_index,y_index] = 1

        return speed_allowed_matrix


    def runSingleSimulation(self, controllerType='default', simulationCutoff=None):


        self.Car.setCarState(0.0,0.0,self.initial_XVelocity,self.initial_YVelocity)
        self.setRobotFrameState(0.0,0.0,0.0)


        currentCarState = np.copy(self.Car.state)
        nextCarState = np.copy(self.Car.state)
        self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2])

        self.Sensor.setLocator(self.locator)

        firstRaycast = self.Sensor.raycastAll(self.frame)
        firstRaycastLocations = self.Sensor.raycastAllLocations(self.frame)

        nextRaycast = np.zeros(self.Sensor.numRays)

        # record the reward data
        runData = dict()
        startIdx = self.counter

        controlInput = [0,0]


        while (self.counter < self.numTimesteps - 1):
            idx = self.counter
            currentTime = self.t[idx]
            self.stateOverTime[idx,:] = currentCarState
            x = self.stateOverTime[idx,0]
            y = self.stateOverTime[idx,1]
            self.setRobotFrameState(x,y,0.0)
            # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2])

            currentRaycastIntersectionLocations = self.Sensor.raycastLocationsOnlyOfIntersections(self.frame)

            if self.checkInCollision(currentCarState[0], currentCarState[1], currentRaycastIntersectionLocations):
                break
            

            if controllerType not in self.colorMap.keys():
                print
                raise ValueError("controller of type " + controllerType + " not supported")

            # compute all trajectories from action set
            self.ActionSet.computeAllPositions(currentCarState[0], currentCarState[1], currentCarState[2],currentCarState[3])
            self.current_initial_velocity_x = currentCarState[2]
            self.current_initial_velocity_y = currentCarState[3]

            speed_allowed_matrix = self.identifySpeedAllowedTrajectories()

            probability_vector, indices_list = self.computeProbabilitiesOfCollisionAllTrajectories(currentRaycastIntersectionLocations, speed_allowed_matrix)

            euclideans_vector = self.terminalEuclideanCostForTrajectories()

            jerk_vector = self.computeJerkVector(controlInput)

            # k_collision_cost = 10
            # k_terminal_cost = 1
            # k_jerk = 0.1
            # sum_vector = probability_vector*k_collision_cost + euclideans_vector/np.max(euclideans_vector)*k_terminal_cost + k_jerk*jerk_vector/np.max(jerk_vector)
            # min_action_index_in_vector = np.argmin(sum_vector)
            # x_index_to_use = indices_list[min_action_index_in_vector][0]
            # y_index_to_use = indices_list[min_action_index_in_vector][1]

            

            # convert to probability no collision
            probability_vector = np.ones(len(probability_vector)) - probability_vector
            
            # convert distances to amount of progress
            current_distance = np.sqrt((currentCarState[0] - self.globalGoal.global_goal_x)**2 + (currentCarState[1] - self.globalGoal.global_goal_y)**2)
            #print "euclideans vector", euclideans_vector

            euclidean_progress_vector = np.ones(len(euclideans_vector))*current_distance - euclideans_vector
            #print "euclidean progress vector", euclidean_progress_vector
            reward_vector = euclidean_progress_vector - 0.1*jerk_vector/np.max(jerk_vector)
            #print "reward_vector", reward_vector
            
            expected_reward = np.multiply(probability_vector, reward_vector)
            max_action_index_in_vector = np.argmax(expected_reward)
            x_index_to_use = indices_list[max_action_index_in_vector][0]
            y_index_to_use = indices_list[max_action_index_in_vector][1]

        
            self.actionIndicesOverTime[idx,:] = [x_index_to_use, y_index_to_use] 

            x_accel = self.ActionSet.a_x[x_index_to_use]
            y_accel = self.ActionSet.a_y[y_index_to_use]

            controlInput = [x_accel, y_accel]

            self.controlInputData[idx] = controlInput

            nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt)

        
            x = nextCarState[0]
            y = nextCarState[1]
            theta = nextCarState[2]
            self.setRobotFrameState(x,y,theta)
            

            #bookkeeping
            currentCarState = nextCarState
            
            self.counter+=1

            # break if we are in collision

            if self.counter >= simulationCutoff:
                break


        # fill in the last state by hand
        self.stateOverTime[self.counter,:] = currentCarState
        self.actionIndicesOverTime[self.counter,:] = [x_index_to_use, y_index_to_use]


        # this just makes sure we don't get stuck in an infinite loop.
        if startIdx == self.counter:
            self.counter += 1

        return runData

    def setNumpyRandomSeed(self, seed=1):
        np.random.seed(seed)

    def runBatchSimulation(self, endTime=None, dt=0.05):

        # for use in playback
        self.dt = self.options['dt']

        self.endTime = self.defaultControllerTime # used to be the sum of the other times as well

        self.t = np.arange(0.0, self.endTime, dt)
        maxNumTimesteps = np.size(self.t)
        self.stateOverTime = np.zeros((maxNumTimesteps+1, 4))
        self.actionIndicesOverTime = np.zeros((maxNumTimesteps+1, 2))
        self.controlInputData = np.zeros((maxNumTimesteps+1,2))
        self.numTimesteps = maxNumTimesteps

        self.controllerTypeOrder = ['default']
        self.counter = 0
        self.simulationData = []
    
        self.initializeStatusBar()

        self.idxDict = dict()
        numRunsCounter = 0


        self.idxDict['default'] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.defaultControllerTime/dt, self.numTimesteps)
        
        while ((self.counter - loopStartIdx < self.defaultControllerTime/dt) and self.counter < self.numTimesteps-1):
            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(controllerType='default',
                                               simulationCutoff=simCutoff)
            runData['startIdx'] = startIdx
            runData['controllerType'] = "default"
            runData['duration'] = self.counter - runData['startIdx']
            runData['endIdx'] = self.counter
            runData['runNumber'] = numRunsCounter
            numRunsCounter+=1
            self.simulationData.append(runData)

        # BOOKKEEPING
        # truncate stateOverTime, raycastData, controlInputs to be the correct size
        self.numTimesteps = self.counter + 1
        self.stateOverTime = self.stateOverTime[0:self.counter+1, :]
        self.actionIndicesOverTime = self.actionIndicesOverTime[0:self.counter+1, :]
        self.controlInputData = self.controlInputData[0:self.counter+1]
        self.endTime = 1.0*self.counter/self.numTimesteps*self.endTime

    def initializeStatusBar(self):
        self.numTicks = 10
        self.nextTickComplete = 1.0 / float(self.numTicks)
        self.nextTickIdx = 1
        print "Simulation percentage complete: (", self.numTicks, " # is complete)"
    
    def printStatusBar(self):
        fractionDone = float(self.counter) / float(self.numTimesteps)
        if fractionDone > self.nextTickComplete:

            self.nextTickIdx += 1
            self.nextTickComplete += 1.0 / self.numTicks

            timeSoFar = time.time() - self.startSimTime 
            estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone
            estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0

            print "#" * self.nextTickIdx, "-" * (self.numTicks - self.nextTickIdx), "estimated", estimatedTimeLeft_minutes, "minutes left"


    def setCollisionFreeInitialState(self):
        tol = 5

        while True:
            
            x = 0.0
            y =   -5.0
            theta = 0 #+ np.random.uniform(0,2*np.pi,1)[0] * 0.01
            
            self.Car.setCarState(x,y,theta)
            self.setRobotFrameState(x,y,theta)

            print "In loop"

            if not self.checkInCollision():
                break
                
        return x,y,theta


    def setRandomCollisionFreeInitialState(self):
        tol = 5

        while True:
            
            x = np.random.uniform(self.world.Xmin+tol, self.world.Xmax-tol, 1)[0]
            y = np.random.uniform(self.world.Ymin+tol, self.world.Ymax-tol, 1)[0]
            #theta = np.random.uniform(0,2*np.pi,1)[0]
            theta = 0 #always forward

            self.Car.setCarState(x,y,self.initial_XVelocity,self.initial_YVelocity)
            self.setRobotFrameState(x,y,theta)

            if not self.checkInCollision():
                break

        self.firstRandomCollisionFreeInitialState_x = x
        self.firstRandomCollisionFreeInitialState_y = y

        return x,y,0,0

    def setupPlayback(self):

        self.timer = TimerCallback(targetFps=30)
        self.timer.callback = self.tick

        playButtonFps = 1.0/self.dt
        print "playButtonFPS", playButtonFps
        self.playTimer = TimerCallback(targetFps=playButtonFps)
        self.playTimer.callback = self.playTimerCallback
        self.sliderMovedByPlayTimer = False

        panel = QtGui.QWidget()
        l = QtGui.QHBoxLayout(panel)

        self.max_velocity = 20.0

        sliderXVelocity = QtGui.QSlider(QtCore.Qt.Horizontal)
        sliderXVelocity.connect('valueChanged(int)', self.onXVelocityChanged)
        sliderXVelocity.setMaximum(self.max_velocity)
        sliderXVelocity.setMinimum(-self.max_velocity)

        l.addWidget(sliderXVelocity)

        sliderYVelocity = QtGui.QSlider(QtCore.Qt.Horizontal)
        sliderYVelocity.connect('valueChanged(int)', self.onYVelocityChanged)
        sliderYVelocity.setMaximum(self.max_velocity)
        sliderYVelocity.setMinimum(-self.max_velocity)
        l.addWidget(sliderYVelocity)

        randomGlobalGoalButton = QtGui.QPushButton('Initialize Random Global Goal')
        randomGlobalGoalButton.connect('clicked()', self.onRandomGlobalGoalButton)
        l.addWidget(randomGlobalGoalButton)

        drawActionSetButton = QtGui.QPushButton('Draw Action Set')
        drawActionSetButton.connect('clicked()', self.onDrawActionSetButton)
        l.addWidget(drawActionSetButton)

        runSimButton = QtGui.QPushButton('Simulate')
        runSimButton.connect('clicked()', self.onRunSimButton)
        l.addWidget(runSimButton)

        playButton = QtGui.QPushButton('Play/Pause')
        playButton.connect('clicked()', self.onPlayButton)

        slider = QtGui.QSlider(QtCore.Qt.Horizontal)
        slider.connect('valueChanged(int)', self.onSliderChanged)
        self.sliderMax = self.numTimesteps
        slider.setMaximum(self.sliderMax)
        self.slider = slider

        l.addWidget(playButton)
        l.addWidget(slider)

        toggleSensorNoiseButton = QtGui.QPushButton('Toggle Sensor Noise')
        toggleSensorNoiseButton.connect('clicked()', self.onToggleSensorNoise)
        l.addWidget(toggleSensorNoiseButton)

        showObstaclesButton = QtGui.QPushButton('Show Obsatacles')
        showObstaclesButton.connect('clicked()', self.onShowObstacles)
        l.addWidget(showObstaclesButton)

        w = QtGui.QWidget()
        l = QtGui.QVBoxLayout(w)
        l.addWidget(self.view)
        self.view.orientationMarkerWidget().Off()
        l.addWidget(panel)
        w.showMaximized()

        # need to connect frames with DrawActionSet
        self.frame.connectFrameModified(self.updateDrawActionSet)
        self.updateDrawActionSet(self.frame)
        self.frame.connectFrameModified(self.updateDrawIntersection)
        self.updateDrawIntersection(self.frame)
        
        applogic.resetCamera(viewDirection=[0.2,0,-1])
        self.view.showMaximized()
        self.view.raise_()
        panel = screengrabberpanel.ScreenGrabberPanel(self.view)
        panel.widget.show()

        cameracontrolpanel.CameraControlPanel(self.view).widget.show()

        elapsed = time.time() - self.startSimTime
        simRate = self.counter/elapsed
        print "Total run time", elapsed
        print "Ticks (Hz)", simRate
        print "Number of steps taken", self.counter
        self.app.start()

    def run(self, launchApp=True):
        self.sphere_toggle = False
        self.sigma_sensor = 0.5
        self.speed_max = 10.0


        self.running_sim = True
        self.onRandomGlobalGoalButton()
        self.counter = 1
        self.runBatchSimulation()
        self.running_sim = False

        if launchApp:
            self.setupPlayback()

    def onToggleSensorNoise(self):
        self.sphere_toggle = not self.sphere_toggle

    def onShowObstacles(self):
        print "time to show obstacles"
        A = World.buildCircleWorld(percentObsDensity=self.options['World']['percentObsDensity'],
                                            circleRadius=self.options['World']['circleRadius'],
                                            nonRandom=self.options['World']['nonRandomWorld'],
                                            scale=self.options['World']['scale'],
                                            randomSeed=self.options['World']['randomSeed'],
                                            obstaclesInnerFraction=self.options['World']['obstaclesInnerFraction'], alpha=1.0)


    def drawFirstIntersections(self, frame, firstRaycast):
        origin = np.array(frame.transform.GetPosition())
        d = DebugData()

        firstRaycastLocations = self.Sensor.invertRaycastsToLocations(self.frame, firstRaycast)

        for i in xrange(self.Sensor.numRays):
            endpoint = firstRaycastLocations[i,:]

            if firstRaycast[i] >= self.Sensor.rayLength - 0.1:
                d.addLine(origin, endpoint, color=[0,1,0])
            else:
                d.addLine(origin, endpoint, color=[1,0,0])

        vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255')

        self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations)
        self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData)
        self.locator = self.LineSegmentLocator
        self.Sensor.setLocator(self.LineSegmentLocator)


    def updateDrawIntersection(self, frame, locator="None"):
        if locator=="None":
            locator = self.locator

        if self.sphere_toggle:
            sphere_radius=self.sigma_sensor
        else:
            sphere_radius=0.0


        origin = np.array(frame.transform.GetPosition())
        #print "origin is now at", origin
        d = DebugData()
        d_sphere=DebugData()

        sliderIdx = self.slider.value

        controllerType = self.getControllerTypeFromCounter(sliderIdx)
        colorMaxRange = self.colorMap[controllerType]

        for i in xrange(self.Sensor.numRays):
            ray = self.Sensor.rays[:,i]
            rayTransformed = np.array(frame.transform.TransformNormal(ray))
            #print "rayTransformed is", rayTransformed
            intersection = self.Sensor.raycast(locator, origin, origin + rayTransformed*self.Sensor.rayLength)

            if intersection is not None:
                d.addLine(origin, intersection, color=[1,0,0])
                d_sphere.addSphere(intersection, radius=sphere_radius, color=[1,0,0])

            else:
                d.addLine(origin, origin+rayTransformed*self.Sensor.rayLength, color=colorMaxRange)
                d_sphere.addSphere(origin, radius=0.0, color=[0,1,0])

        vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255')
        vis.updatePolyData(d_sphere.getPolyData(), 'spheres', colorByName='RGB255', alpha=0.3)



    def getControllerTypeFromCounter(self, counter):
        name = self.controllerTypeOrder[0]

        for controllerType in self.controllerTypeOrder[1:]:
            if counter >= self.idxDict[controllerType]:
                name = controllerType

        return name


    def setRobotFrameState(self, x, y, theta):
        t = vtk.vtkTransform()
        t.Translate(x,y,0.0)
        t.RotateZ(np.degrees(theta))
        self.robot.getChildFrame().copyFrame(t)

    # returns true if we are in collision
    def checkInCollision(self, x, y, raycastLocations):
        return False

        # carState = [x,y,0]
        # for raycastLocation in raycastLocations:
        #     if np.linalg.norm(carState - raycastLocation) < 0.3:
        #         return True

        # return False

    def updateDrawActionSet(self, frame):

        origin = np.array(frame.transform.GetPosition())

        if not self.running_sim:
            self.ActionSet.computeAllPositions(self.stateOverTime[self.currentIdx,0], self.stateOverTime[self.currentIdx,1], self.stateOverTime[self.currentIdx,2],self.stateOverTime[self.currentIdx,3])
            #self.ActionSet.drawActionSetFinal()
            self.ActionSet.drawActionSetFull()
            self.drawChosenFunnel()

    def drawChosenFunnel(self):
        velocity_initial_x = self.stateOverTime[self.currentIdx,2]
        velocity_initial_y = self.stateOverTime[self.currentIdx,3]

        variance_x = 2.5 + abs(velocity_initial_x*0.1)
        variance_y = 2.5 + abs(velocity_initial_y*0.1)
        variance_z = 1.5

        x_index = self.actionIndicesOverTime[self.currentIdx,0]
        y_index = self.actionIndicesOverTime[self.currentIdx,1]

        for i in xrange(0,10):
            time = 0.5/10.0*i
            x_center = self.ActionSet.p_x_trajectories[x_index, i]
            y_center = self.ActionSet.p_y_trajectories[y_index, i]
            z_center = 0.0
            World.buildEllipse(i, [x_center,y_center,z_center], variance_x*time, variance_y*time, variance_z*time, alpha=0.3)


    def tick(self):
        #print timer.elapsed
        #simulate(t.elapsed)
        x = np.sin(time.time())
        y = np.cos(time.time())
        self.setRobotFrameState(x,y,0.0)
        if (time.time() - self.playTime) > self.endTime:
            self.playTimer.stop()

    def tick2(self):
        newtime = time.time() - self.playTime
        print time.time() - self.playTime
        x = np.sin(newtime)
        y = np.cos(newtime)
        self.setRobotFrameState(x,y,0.0)

    # just increment the slider, stop the timer if we get to the end
    def playTimerCallback(self):
        self.sliderMovedByPlayTimer = True
        currentIdx = self.slider.value
        nextIdx = currentIdx + 1
        self.slider.setSliderPosition(nextIdx)
        if currentIdx >= self.sliderMax:
            print "reached end of tape, stopping playTime"
            self.playTimer.stop()

    def onXVelocityChanged(self, value):
        self.initial_XVelocity = value
        print "initial x velocity changed to ", value

    def onYVelocityChanged(self, value):
        self.initial_YVelocity = -value
        print "initial y velocity changed to ", -value

    def onSliderChanged(self, value):
        if not self.sliderMovedByPlayTimer:
            self.playTimer.stop()
        numSteps = len(self.stateOverTime)
        idx = int(np.floor(numSteps*(1.0*value/self.sliderMax)))
        idx = min(idx, numSteps-1)
        self.currentIdx = idx
        x,y, xdot, ydot = self.stateOverTime[idx]
        self.setRobotFrameState(x,y,0)
        self.sliderMovedByPlayTimer = False

    def onRandomGlobalGoalButton(self):
        print "random global goal button pressed"
        self.globalGoal = World.buildGlobalGoal(scale=self.options['World']['scale'])

    def onDrawActionSetButton(self):
        print "drawing action set"
        #self.ActionSet.computeFinalPositions(self.XVelocity_drawing,self.YVelocity_drawing)
        self.ActionSet.computeAllPositions(self.Car.state[0], self.Car.state[1], self.initial_XVelocity, self.initial_YVelocity)
        #self.ActionSet.drawActionSetFinal()
        self.ActionSet.drawActionSetFull()

    def onRunSimButton(self):
        self.running_sim = True
        self.runBatchSimulation()
        self.running_sim = False
        #self.saveToFile("latest")

    def onPlayButton(self):

        if self.playTimer.isActive():
            self.onPauseButton()
            return

        print 'play'
        self.playTimer.start()
        self.playTime = time.time()

    def onPauseButton(self):
        print 'pause'
        self.playTimer.stop()

    def saveToFile(self, filename):

        # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime

        filename = 'data/' + filename + ".out"
        my_shelf = shelve.open(filename,'n')

        my_shelf['options'] = self.options

        my_shelf['simulationData'] = self.simulationData
        my_shelf['stateOverTime'] = self.stateOverTime
        my_shelf['controlInputData'] = self.controlInputData
        my_shelf['numTimesteps'] = self.numTimesteps
        my_shelf['idxDict'] = self.idxDict
        my_shelf['counter'] = self.counter
        my_shelf.close()


    @staticmethod
    def loadFromFile(filename):
        filename = 'data/' + filename + ".out"
        sim = Simulator(autoInitialize=False, verbose=False)

        my_shelf = shelve.open(filename)
        sim.options = my_shelf['options']
        sim.initialize()

        sim.simulationData = my_shelf['simulationData']
        sim.stateOverTime = np.array(my_shelf['stateOverTime'])
        sim.controlInputData = np.array(my_shelf['controlInputData'])
        sim.numTimesteps = my_shelf['numTimesteps']
        sim.idxDict = my_shelf['idxDict']
        sim.counter = my_shelf['counter']

        my_shelf.close()

        return sim
class Simulator(object):
    def __init__(
        self,
        percentObsDensity=20,
        endTime=40,
        randomizeControl=False,
        nonRandomWorld=False,
        circleRadius=0.7,
        worldScale=1.0,
        supervisedTrainingTime=500,
        autoInitialize=True,
        verbose=True,
        sarsaType="discrete",
    ):
        self.verbose = verbose
        self.randomizeControl = randomizeControl
        self.startSimTime = time.time()
        self.collisionThreshold = 1.3
        self.randomSeed = 5
        self.Sarsa_numInnerBins = 4
        self.Sarsa_numOuterBins = 4
        self.Sensor_rayLength = 8
        self.sarsaType = sarsaType

        self.percentObsDensity = percentObsDensity
        self.supervisedTrainingTime = 10
        self.learningRandomTime = 10
        self.learningEvalTime = 10
        self.defaultControllerTime = 10
        self.nonRandomWorld = nonRandomWorld
        self.circleRadius = circleRadius
        self.worldScale = worldScale
        # create the visualizer object
        self.app = ConsoleApp()
        # view = app.createView(useGrid=False)
        self.view = self.app.createView(useGrid=False)

        self.initializeOptions()
        self.initializeColorMap()
        if autoInitialize:
            self.initialize()

    def initializeOptions(self):
        self.options = dict()

        self.options["Reward"] = dict()
        self.options["Reward"]["actionCost"] = 0.1
        self.options["Reward"]["collisionPenalty"] = 100.0
        self.options["Reward"]["raycastCost"] = 20.0

        self.options["SARSA"] = dict()
        self.options["SARSA"]["type"] = "discrete"
        self.options["SARSA"]["lam"] = 0.7
        self.options["SARSA"]["alphaStepSize"] = 0.2
        self.options["SARSA"]["useQLearningUpdate"] = False
        self.options["SARSA"]["epsilonGreedy"] = 0.2
        self.options["SARSA"]["burnInTime"] = 500
        self.options["SARSA"]["epsilonGreedyExponent"] = 0.3
        self.options["SARSA"]["exponentialDiscountFactor"] = 0.05  # so gamma = e^(-rho*dt)
        self.options["SARSA"]["numInnerBins"] = 5
        self.options["SARSA"]["numOuterBins"] = 4
        self.options["SARSA"]["binCutoff"] = 0.5

        self.options["World"] = dict()
        self.options["World"]["obstaclesInnerFraction"] = 0.85
        self.options["World"]["randomSeed"] = 40
        self.options["World"]["percentObsDensity"] = 7.5
        self.options["World"]["nonRandomWorld"] = True
        self.options["World"]["circleRadius"] = 1.75
        self.options["World"]["scale"] = 1.0

        self.options["Sensor"] = dict()
        self.options["Sensor"]["rayLength"] = 10
        self.options["Sensor"]["numRays"] = 20

        self.options["Car"] = dict()
        self.options["Car"]["velocity"] = 12

        self.options["dt"] = 0.05

        self.options["runTime"] = dict()
        self.options["runTime"]["supervisedTrainingTime"] = 10
        self.options["runTime"]["learningRandomTime"] = 10
        self.options["runTime"]["learningEvalTime"] = 10
        self.options["runTime"]["defaultControllerTime"] = 10

    def setDefaultOptions(self):

        defaultOptions = dict()

        defaultOptions["Reward"] = dict()
        defaultOptions["Reward"]["actionCost"] = 0.1
        defaultOptions["Reward"]["collisionPenalty"] = 100.0
        defaultOptions["Reward"]["raycastCost"] = 20.0

        defaultOptions["SARSA"] = dict()
        defaultOptions["SARSA"]["type"] = "discrete"
        defaultOptions["SARSA"]["lam"] = 0.7
        defaultOptions["SARSA"]["alphaStepSize"] = 0.2
        defaultOptions["SARSA"]["useQLearningUpdate"] = False
        defaultOptions["SARSA"]["useSupervisedTraining"] = True
        defaultOptions["SARSA"]["epsilonGreedy"] = 0.2
        defaultOptions["SARSA"]["burnInTime"] = 500
        defaultOptions["SARSA"]["epsilonGreedyExponent"] = 0.3
        defaultOptions["SARSA"]["exponentialDiscountFactor"] = 0.05  # so gamma = e^(-rho*dt)
        defaultOptions["SARSA"]["numInnerBins"] = 5
        defaultOptions["SARSA"]["numOuterBins"] = 4
        defaultOptions["SARSA"]["binCutoff"] = 0.5
        defaultOptions["SARSA"]["forceDriveStraight"] = True

        defaultOptions["World"] = dict()
        defaultOptions["World"]["obstaclesInnerFraction"] = 0.85
        defaultOptions["World"]["randomSeed"] = 40
        defaultOptions["World"]["percentObsDensity"] = 7.5
        defaultOptions["World"]["nonRandomWorld"] = True
        defaultOptions["World"]["circleRadius"] = 1.75
        defaultOptions["World"]["scale"] = 1.0

        defaultOptions["Sensor"] = dict()
        defaultOptions["Sensor"]["rayLength"] = 10
        defaultOptions["Sensor"]["numRays"] = 20

        defaultOptions["Car"] = dict()
        defaultOptions["Car"]["velocity"] = 12

        defaultOptions["dt"] = 0.05

        defaultOptions["runTime"] = dict()
        defaultOptions["runTime"]["supervisedTrainingTime"] = 10
        defaultOptions["runTime"]["learningRandomTime"] = 10
        defaultOptions["runTime"]["learningEvalTime"] = 10
        defaultOptions["runTime"]["defaultControllerTime"] = 10

        for k in defaultOptions:
            self.options.setdefault(k, defaultOptions[k])

        for k in defaultOptions:
            if not isinstance(defaultOptions[k], dict):
                continue

            for j in defaultOptions[k]:
                self.options[k].setdefault(j, defaultOptions[k][j])

    def initializeColorMap(self):
        self.colorMap = dict()
        self.colorMap["defaultRandom"] = [0, 0, 1]
        self.colorMap["learnedRandom"] = [1.0, 0.54901961, 0.0]  # this is orange
        self.colorMap["learned"] = [0.58039216, 0.0, 0.82745098]  # this is yellow
        self.colorMap["default"] = [0, 1, 0]

    def initialize(self):

        self.dt = self.options["dt"]
        self.controllerTypeOrder = ["defaultRandom", "learnedRandom", "learned", "default"]

        self.setDefaultOptions()

        self.Sensor = SensorObj(
            rayLength=self.options["Sensor"]["rayLength"], numRays=self.options["Sensor"]["numRays"]
        )
        self.Controller = ControllerObj(self.Sensor)
        self.Car = CarPlant(controller=self.Controller, velocity=self.options["Car"]["velocity"])
        self.Reward = Reward(
            self.Sensor,
            collisionThreshold=self.collisionThreshold,
            actionCost=self.options["Reward"]["actionCost"],
            collisionPenalty=self.options["Reward"]["collisionPenalty"],
            raycastCost=self.options["Reward"]["raycastCost"],
        )
        self.setSARSA()

        # create the things needed for simulation
        om.removeFromObjectModel(om.findObjectByName("world"))
        self.world = World.buildCircleWorld(
            percentObsDensity=self.options["World"]["percentObsDensity"],
            circleRadius=self.options["World"]["circleRadius"],
            nonRandom=self.options["World"]["nonRandomWorld"],
            scale=self.options["World"]["scale"],
            randomSeed=self.options["World"]["randomSeed"],
            obstaclesInnerFraction=self.options["World"]["obstaclesInnerFraction"],
        )

        om.removeFromObjectModel(om.findObjectByName("robot"))
        self.robot, self.frame = World.buildRobot()
        self.locator = World.buildCellLocator(self.world.visObj.polyData)
        self.Sensor.setLocator(self.locator)
        self.frame = self.robot.getChildFrame()
        self.frame.setProperty("Scale", 3)
        self.frame.setProperty("Edit", True)
        self.frame.widget.HandleRotationEnabledOff()
        rep = self.frame.widget.GetRepresentation()
        rep.SetTranslateAxisEnabled(2, False)
        rep.SetRotateAxisEnabled(0, False)
        rep.SetRotateAxisEnabled(1, False)

        self.supervisedTrainingTime = self.options["runTime"]["supervisedTrainingTime"]
        self.learningRandomTime = self.options["runTime"]["learningRandomTime"]
        self.learningEvalTime = self.options["runTime"]["learningEvalTime"]
        self.defaultControllerTime = self.options["runTime"]["defaultControllerTime"]

        self.Car.setFrame(self.frame)
        print "Finished initialization"

    def setSARSA(self, type=None):
        if type is None:
            type = self.options["SARSA"]["type"]

        if type == "discrete":
            self.Sarsa = SARSADiscrete(
                sensorObj=self.Sensor,
                actionSet=self.Controller.actionSet,
                collisionThreshold=self.collisionThreshold,
                alphaStepSize=self.options["SARSA"]["alphaStepSize"],
                useQLearningUpdate=self.options["SARSA"]["useQLearningUpdate"],
                lam=self.options["SARSA"]["lam"],
                numInnerBins=self.options["SARSA"]["numInnerBins"],
                numOuterBins=self.options["SARSA"]["numOuterBins"],
                binCutoff=self.options["SARSA"]["binCutoff"],
                burnInTime=self.options["SARSA"]["burnInTime"],
                epsilonGreedy=self.options["SARSA"]["epsilonGreedy"],
                epsilonGreedyExponent=self.options["SARSA"]["epsilonGreedyExponent"],
                forceDriveStraight=self.options["SARSA"]["forceDriveStraight"],
            )
        elif type == "continuous":
            self.Sarsa = SARSAContinuous(
                sensorObj=self.Sensor,
                actionSet=self.Controller.actionSet,
                lam=self.options["SARSA"]["lam"],
                alphaStepSize=self.options["SARSA"]["alphaStepSize"],
                collisionThreshold=self.collisionThreshold,
                burnInTime=self.options["SARSA"]["burnInTime"],
                epsilonGreedy=self.options["SARSA"]["epsilonGreedy"],
                epsilonGreedyExponent=self.options["SARSA"]["epsilonGreedyExponent"],
            )
        else:
            raise ValueError("sarsa type must be either discrete or continuous")

    def runSingleSimulation(self, updateQValues=True, controllerType="default", simulationCutoff=None):

        if self.verbose:
            print "using QValue based controller = ", useQValueController

        self.setCollisionFreeInitialState()

        currentCarState = np.copy(self.Car.state)
        nextCarState = np.copy(self.Car.state)
        self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2])
        currentRaycast = self.Sensor.raycastAll(self.frame)
        nextRaycast = np.zeros(self.Sensor.numRays)
        self.Sarsa.resetElibilityTraces()

        # record the reward data
        runData = dict()
        reward = 0
        discountedReward = 0
        avgReward = 0
        startIdx = self.counter

        while self.counter < self.numTimesteps - 1:
            idx = self.counter
            currentTime = self.t[idx]
            self.stateOverTime[idx, :] = currentCarState
            x = self.stateOverTime[idx, 0]
            y = self.stateOverTime[idx, 1]
            theta = self.stateOverTime[idx, 2]
            self.setRobotFrameState(x, y, theta)
            # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2])
            currentRaycast = self.Sensor.raycastAll(self.frame)
            self.raycastData[idx, :] = currentRaycast
            S_current = (currentCarState, currentRaycast)

            if controllerType not in self.colorMap.keys():
                print
                raise ValueError("controller of type " + controllerType + " not supported")

            if controllerType in ["default", "defaultRandom"]:
                if controllerType == "defaultRandom":
                    controlInput, controlInputIdx = self.Controller.computeControlInput(
                        currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=True
                    )
                else:
                    controlInput, controlInputIdx = self.Controller.computeControlInput(
                        currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False
                    )

            if controllerType in ["learned", "learnedRandom"]:
                if controllerType == "learned":
                    randomizeControl = False
                else:
                    randomizeControl = True

                counterForGreedyDecay = self.counter - self.idxDict["learnedRandom"]
                controlInput, controlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(
                    S_current, counter=counterForGreedyDecay, randomize=randomizeControl
                )

                self.emptyQValue[idx] = emptyQValue
                if emptyQValue and self.options["SARSA"]["useSupervisedTraining"]:
                    controlInput, controlInputIdx = self.Controller.computeControlInput(
                        currentCarState, currentTime, self.frame, raycastDistance=currentRaycast, randomize=False
                    )

            self.controlInputData[idx] = controlInput

            nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt)

            # want to compute nextRaycast so we can do the SARSA algorithm
            x = nextCarState[0]
            y = nextCarState[1]
            theta = nextCarState[2]
            self.setRobotFrameState(x, y, theta)
            nextRaycast = self.Sensor.raycastAll(self.frame)

            # Compute the next control input
            S_next = (nextCarState, nextRaycast)

            if controllerType in ["default", "defaultRandom"]:
                if controllerType == "defaultRandom":
                    nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(
                        nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=True
                    )
                else:
                    nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(
                        nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False
                    )

            if controllerType in ["learned", "learnedRandom"]:
                if controllerType == "learned":
                    randomizeControl = False
                else:
                    randomizeControl = True

                counterForGreedyDecay = self.counter - self.idxDict["learnedRandom"]
                nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(
                    S_next, counter=counterForGreedyDecay, randomize=randomizeControl
                )

                if emptyQValue and self.options["SARSA"]["useSupervisedTraining"]:
                    nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(
                        nextCarState, currentTime, self.frame, raycastDistance=nextRaycast, randomize=False
                    )

            # if useQValueController:
            #     nextControlInput, nextControlInputIdx, emptyQValue = self.Sarsa.computeGreedyControlPolicy(S_next)
            #
            # if not useQValueController or emptyQValue:
            #     nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(nextCarState, currentTime, self.frame,
            #                                                                             raycastDistance=nextRaycast)

            # compute the reward
            reward = self.Reward.computeReward(S_next, controlInput)
            self.rewardData[idx] = reward

            discountedReward += self.Sarsa.gamma ** (self.counter - startIdx) * reward
            avgReward += reward

            ###### SARSA update
            if updateQValues:
                self.Sarsa.sarsaUpdate(S_current, controlInputIdx, reward, S_next, nextControlInputIdx)

            # bookkeeping
            currentCarState = nextCarState
            currentRaycast = nextRaycast
            self.counter += 1
            # break if we are in collision
            if self.checkInCollision(nextRaycast):
                if self.verbose:
                    print "Had a collision, terminating simulation"
                break

            if self.counter >= simulationCutoff:
                break

        # fill in the last state by hand
        self.stateOverTime[self.counter, :] = currentCarState
        self.raycastData[self.counter, :] = currentRaycast

        # return the total reward
        avgRewardNoCollisionPenalty = avgReward - reward
        avgReward = avgReward * 1.0 / max(1, self.counter - startIdx)
        avgRewardNoCollisionPenalty = avgRewardNoCollisionPenalty * 1.0 / max(1, self.counter - startIdx)
        # this extra multiplication is so that it is in the same "units" as avgReward
        runData["discountedReward"] = discountedReward * (1 - self.Sarsa.gamma)
        runData["avgReward"] = avgReward
        runData["avgRewardNoCollisionPenalty"] = avgRewardNoCollisionPenalty

        # this just makes sure we don't get stuck in an infinite loop.
        if startIdx == self.counter:
            self.counter += 1

        return runData

    def setNumpyRandomSeed(self, seed=1):
        np.random.seed(seed)

    def runBatchSimulation(self, endTime=None, dt=0.05):

        # for use in playback
        self.dt = self.options["dt"]
        self.Sarsa.setDiscountFactor(dt)

        self.endTime = (
            self.supervisedTrainingTime + self.learningRandomTime + self.learningEvalTime + self.defaultControllerTime
        )

        self.t = np.arange(0.0, self.endTime, dt)
        maxNumTimesteps = np.size(self.t)
        self.stateOverTime = np.zeros((maxNumTimesteps + 1, 3))
        self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays))
        self.controlInputData = np.zeros(maxNumTimesteps + 1)
        self.rewardData = np.zeros(maxNumTimesteps + 1)
        self.emptyQValue = np.zeros(maxNumTimesteps + 1, dtype="bool")
        self.numTimesteps = maxNumTimesteps

        self.controllerTypeOrder = ["defaultRandom", "learnedRandom", "learned", "default"]
        self.counter = 0
        self.simulationData = []

        self.initializeStatusBar()

        self.idxDict = dict()
        numRunsCounter = 0

        # three while loops for different phases of simulation, supervisedTraining, learning, default
        self.idxDict["defaultRandom"] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.supervisedTrainingTime / dt, self.numTimesteps)
        while (self.counter - loopStartIdx < self.supervisedTrainingTime / dt) and self.counter < self.numTimesteps:
            self.printStatusBar()
            useQValueController = False
            startIdx = self.counter
            runData = self.runSingleSimulation(
                updateQValues=True, controllerType="defaultRandom", simulationCutoff=simCutoff
            )

            runData["startIdx"] = startIdx
            runData["controllerType"] = "defaultRandom"
            runData["duration"] = self.counter - runData["startIdx"]
            runData["endIdx"] = self.counter
            runData["runNumber"] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        self.idxDict["learnedRandom"] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.learningRandomTime / dt, self.numTimesteps)
        while (self.counter - loopStartIdx < self.learningRandomTime / dt) and self.counter < self.numTimesteps:
            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(
                updateQValues=True, controllerType="learnedRandom", simulationCutoff=simCutoff
            )
            runData["startIdx"] = startIdx
            runData["controllerType"] = "learnedRandom"
            runData["duration"] = self.counter - runData["startIdx"]
            runData["endIdx"] = self.counter
            runData["runNumber"] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        self.idxDict["learned"] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.learningEvalTime / dt, self.numTimesteps)
        while (self.counter - loopStartIdx < self.learningEvalTime / dt) and self.counter < self.numTimesteps:

            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(
                updateQValues=False, controllerType="learned", simulationCutoff=simCutoff
            )
            runData["startIdx"] = startIdx
            runData["controllerType"] = "learned"
            runData["duration"] = self.counter - runData["startIdx"]
            runData["endIdx"] = self.counter
            runData["runNumber"] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        self.idxDict["default"] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.defaultControllerTime / dt, self.numTimesteps)
        while (self.counter - loopStartIdx < self.defaultControllerTime / dt) and self.counter < self.numTimesteps - 1:
            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(
                updateQValues=False, controllerType="default", simulationCutoff=simCutoff
            )
            runData["startIdx"] = startIdx
            runData["controllerType"] = "default"
            runData["duration"] = self.counter - runData["startIdx"]
            runData["endIdx"] = self.counter
            runData["runNumber"] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        # BOOKKEEPING
        # truncate stateOverTime, raycastData, controlInputs to be the correct size
        self.numTimesteps = self.counter + 1
        self.stateOverTime = self.stateOverTime[0 : self.counter + 1, :]
        self.raycastData = self.raycastData[0 : self.counter + 1, :]
        self.controlInputData = self.controlInputData[0 : self.counter + 1]
        self.rewardData = self.rewardData[0 : self.counter + 1]
        self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime

    def initializeStatusBar(self):
        self.numTicks = 10
        self.nextTickComplete = 1.0 / float(self.numTicks)
        self.nextTickIdx = 1
        print "Simulation percentage complete: (", self.numTicks, " # is complete)"

    def printStatusBar(self):
        fractionDone = float(self.counter) / float(self.numTimesteps)
        if fractionDone > self.nextTickComplete:

            self.nextTickIdx += 1
            self.nextTickComplete += 1.0 / self.numTicks

            timeSoFar = time.time() - self.startSimTime
            estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone
            estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0

            print "#" * self.nextTickIdx, "-" * (
                self.numTicks - self.nextTickIdx
            ), "estimated", estimatedTimeLeft_minutes, "minutes left"

    def setCollisionFreeInitialState(self):
        tol = 5

        while True:
            x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol, 1)[0]
            y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol, 1)[0]
            theta = np.random.uniform(0, 2 * np.pi, 1)[0]
            self.Car.setCarState(x, y, theta)
            self.setRobotFrameState(x, y, theta)

            if not self.checkInCollision():
                break

        # if self.checkInCollision():
        #     print "IN COLLISION"
        # else:
        #     print "COLLISION FREE"

        return x, y, theta

    def setupPlayback(self):

        self.timer = TimerCallback(targetFps=30)
        self.timer.callback = self.tick

        playButtonFps = 1.0 / self.dt
        print "playButtonFPS", playButtonFps
        self.playTimer = TimerCallback(targetFps=playButtonFps)
        self.playTimer.callback = self.playTimerCallback
        self.sliderMovedByPlayTimer = False

        panel = QtGui.QWidget()
        l = QtGui.QHBoxLayout(panel)

        playButton = QtGui.QPushButton("Play/Pause")
        playButton.connect("clicked()", self.onPlayButton)

        slider = QtGui.QSlider(QtCore.Qt.Horizontal)
        slider.connect("valueChanged(int)", self.onSliderChanged)
        self.sliderMax = self.numTimesteps
        slider.setMaximum(self.sliderMax)
        self.slider = slider

        l.addWidget(playButton)
        l.addWidget(slider)

        w = QtGui.QWidget()
        l = QtGui.QVBoxLayout(w)
        l.addWidget(self.view)
        l.addWidget(panel)
        w.showMaximized()

        self.frame.connectFrameModified(self.updateDrawIntersection)
        self.updateDrawIntersection(self.frame)

        applogic.resetCamera(viewDirection=[0.2, 0, -1])
        self.view.showMaximized()
        self.view.raise_()
        panel = screengrabberpanel.ScreenGrabberPanel(self.view)
        panel.widget.show()

        elapsed = time.time() - self.startSimTime
        simRate = self.counter / elapsed
        print "Total run time", elapsed
        print "Ticks (Hz)", simRate
        print "Number of steps taken", self.counter
        self.app.start()

    def run(self, launchApp=True):
        self.counter = 1
        self.runBatchSimulation()
        # self.Sarsa.plotWeights()

        if launchApp:
            self.setupPlayback()

    def updateDrawIntersection(self, frame):

        origin = np.array(frame.transform.GetPosition())
        # print "origin is now at", origin
        d = DebugData()

        sliderIdx = self.slider.value

        controllerType = self.getControllerTypeFromCounter(sliderIdx)
        colorMaxRange = self.colorMap[controllerType]

        # if the QValue was empty then color it green
        if self.emptyQValue[sliderIdx]:
            colorMaxRange = [1, 1, 0]  # this is yellow

        for i in xrange(self.Sensor.numRays):
            ray = self.Sensor.rays[:, i]
            rayTransformed = np.array(frame.transform.TransformNormal(ray))
            # print "rayTransformed is", rayTransformed
            intersection = self.Sensor.raycast(self.locator, origin, origin + rayTransformed * self.Sensor.rayLength)

            if intersection is not None:
                d.addLine(origin, intersection, color=[1, 0, 0])
            else:
                d.addLine(origin, origin + rayTransformed * self.Sensor.rayLength, color=colorMaxRange)

        vis.updatePolyData(d.getPolyData(), "rays", colorByName="RGB255")

        # camera = self.view.camera()
        # camera.SetFocalPoint(frame.transform.GetPosition())
        # camera.SetPosition(frame.transform.TransformPoint((-30,0,10)))

    def getControllerTypeFromCounter(self, counter):
        name = self.controllerTypeOrder[0]

        for controllerType in self.controllerTypeOrder[1:]:
            if counter >= self.idxDict[controllerType]:
                name = controllerType

        return name

    def setRobotFrameState(self, x, y, theta):
        t = vtk.vtkTransform()
        t.Translate(x, y, 0.0)
        t.RotateZ(np.degrees(theta))
        self.robot.getChildFrame().copyFrame(t)

    # returns true if we are in collision
    def checkInCollision(self, raycastDistance=None):
        if raycastDistance is None:
            self.setRobotFrameState(self.Car.state[0], self.Car.state[1], self.Car.state[2])
            raycastDistance = self.Sensor.raycastAll(self.frame)

        if np.min(raycastDistance) < self.collisionThreshold:
            return True
        else:
            return False

    def tick(self):
        # print timer.elapsed
        # simulate(t.elapsed)
        x = np.sin(time.time())
        y = np.cos(time.time())
        self.setRobotFrameState(x, y, 0.0)
        if (time.time() - self.playTime) > self.endTime:
            self.playTimer.stop()

    def tick2(self):
        newtime = time.time() - self.playTime
        print time.time() - self.playTime
        x = np.sin(newtime)
        y = np.cos(newtime)
        self.setRobotFrameState(x, y, 0.0)

    # just increment the slider, stop the timer if we get to the end
    def playTimerCallback(self):
        self.sliderMovedByPlayTimer = True
        currentIdx = self.slider.value
        nextIdx = currentIdx + 1
        self.slider.setSliderPosition(nextIdx)
        if currentIdx >= self.sliderMax:
            print "reached end of tape, stopping playTime"
            self.playTimer.stop()

    def onSliderChanged(self, value):
        if not self.sliderMovedByPlayTimer:
            self.playTimer.stop()
        numSteps = len(self.stateOverTime)
        idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax)))
        idx = min(idx, numSteps - 1)
        x, y, theta = self.stateOverTime[idx]
        self.setRobotFrameState(x, y, theta)
        self.sliderMovedByPlayTimer = False

    def onPlayButton(self):

        if self.playTimer.isActive():
            self.onPauseButton()
            return

        print "play"
        self.playTimer.start()
        self.playTime = time.time()

    def onPauseButton(self):
        print "pause"
        self.playTimer.stop()

    def computeRunStatistics(self):
        numRuns = len(self.simulationData)
        runStart = np.zeros(numRuns)
        runDuration = np.zeros(numRuns)
        grid = np.arange(1, numRuns + 1)
        discountedReward = np.zeros(numRuns)
        avgReward = np.zeros(numRuns)
        avgRewardNoCollisionPenalty = np.zeros(numRuns)

        idxMap = dict()

        for controllerType, color in self.colorMap.iteritems():
            idxMap[controllerType] = np.zeros(numRuns, dtype=bool)

        for idx, val in enumerate(self.simulationData):
            runStart[idx] = val["startIdx"]
            runDuration[idx] = val["duration"]
            discountedReward[idx] = val["discountedReward"]
            avgReward[idx] = val["avgReward"]
            avgRewardNoCollisionPenalty[idx] = val["avgRewardNoCollisionPenalty"]
            controllerType = val["controllerType"]
            idxMap[controllerType][idx] = True

    def plotRunData(self, controllerTypeToPlot=None, showPlot=True, onlyLearned=False):

        if controllerTypeToPlot == None:
            controllerTypeToPlot = self.colorMap.keys()

        if onlyLearned:
            controllerTypeToPlot = ["learnedRandom", "learned"]

        numRuns = len(self.simulationData)
        runStart = np.zeros(numRuns)
        runDuration = np.zeros(numRuns)
        grid = np.arange(1, numRuns + 1)
        discountedReward = np.zeros(numRuns)
        avgReward = np.zeros(numRuns)
        avgRewardNoCollisionPenalty = np.zeros(numRuns)

        idxMap = dict()

        for controllerType, color in self.colorMap.iteritems():
            idxMap[controllerType] = np.zeros(numRuns, dtype=bool)

        for idx, val in enumerate(self.simulationData):
            runStart[idx] = val["startIdx"]
            runDuration[idx] = val["duration"]
            discountedReward[idx] = val["discountedReward"]
            avgReward[idx] = val["avgReward"]
            avgRewardNoCollisionPenalty[idx] = val["avgRewardNoCollisionPenalty"]
            controllerType = val["controllerType"]
            idxMap[controllerType][idx] = True

            # usedQValueController[idx] = (val['controllerType'] == "QValue")
            # usedDefaultController[idx] = (val['controllerType'] == "default")
            # usedDefaultRandomController[idx] = (val['controllerType'] == "defaultRandom")
            # usedQValueRandomController[idx] = (val['controllerType'] == "QValueRandom")

        self.runStatistics = dict()
        dataMap = {
            "duration": runDuration,
            "discountedReward": discountedReward,
            "avgReward": avgReward,
            "avgRewardNoCollisionPenalty": avgRewardNoCollisionPenalty,
        }

        def computeRunStatistics(dataMap):
            for controllerType, idx in idxMap.iteritems():
                d = dict()
                for dataName, dataSet in dataMap.iteritems():
                    # average the appropriate values in dataset
                    d[dataName] = np.sum(dataSet[idx]) / (1.0 * np.size(dataSet[idx]))

                self.runStatistics[controllerType] = d

        computeRunStatistics(dataMap)

        if not showPlot:
            return

        plt.figure()

        #
        # idxDefaultRandom = np.where(usedDefaultRandomController==True)[0]
        # idxQValueController = np.where(usedQValueController==True)[0]
        # idxQValueControllerRandom = np.where(usedQValueControllerRandom==True)[0]
        # idxDefault = np.where(usedDefaultController==True)[0]
        #
        # plotData = dict()
        # plotData['defaultRandom'] = {'idx': idxDefaultRandom, 'color': 'b'}
        # plotData['QValue'] = {'idx': idxQValueController, 'color': 'y'}
        # plotData['default'] = {'idx': idxDefault, 'color': 'g'}

        def scatterPlot(dataToPlot):
            for controllerType in controllerTypeToPlot:
                idx = idxMap[controllerType]
                plt.scatter(grid[idx], dataToPlot[idx], color=self.colorMap[controllerType])

        def barPlot(dataName):
            plt.title(dataName)
            barWidth = 0.5
            barCounter = 0
            index = np.arange(len(controllerTypeToPlot))

            for controllerType in controllerTypeToPlot:
                val = self.runStatistics[controllerType]
                plt.bar(barCounter, val[dataName], barWidth, color=self.colorMap[controllerType], label=controllerType)
                barCounter += 1

            plt.xticks(index + barWidth / 2.0, controllerTypeToPlot)

        plt.subplot(4, 1, 1)
        plt.title("run duration")
        scatterPlot(runDuration)
        # for controllerType, idx in idxMap.iteritems():
        #     plt.scatter(grid[idx], runDuration[idx], color=self.colorMap[controllerType])

        # plt.scatter(runStart[idxDefaultRandom], runDuration[idxDefaultRandom], color='b')
        # plt.scatter(runStart[idxQValueController], runDuration[idxQValueController], color='y')
        # plt.scatter(runStart[idxDefault], runDuration[idxDefault], color='g')
        plt.xlabel("run #")
        plt.ylabel("episode duration")

        plt.subplot(2, 1, 2)
        plt.title("discounted reward")
        scatterPlot(discountedReward)
        # for key, val in plotData.iteritems():
        #     plt.scatter(grid[idx], discountedReward[idx], color=self.colorMap[controllerType])

        # plt.subplot(3,1,3)
        # plt.title("average reward")
        # scatterPlot(avgReward)
        # for key, val in plotData.iteritems():
        #     plt.scatter(grid[val['idx']],avgReward[val['idx']], color=val['color'])

        # plt.subplot(4,1,4)
        # plt.title("average reward no collision penalty")
        # scatterPlot(avgRewardNoCollisionPenalty)
        # # for key, val in plotData.iteritems():
        # #     plt.scatter(grid[val['idx']],avgRewardNoCollisionPenalty[val['idx']], color=val['color'])

        ## plot summary statistics
        plt.figure()

        plt.subplot(2, 1, 1)
        barPlot("duration")

        plt.subplot(2, 1, 2)
        barPlot("discountedReward")

        # plt.subplot(3,1,3)
        # barPlot("avgReward")
        #
        # plt.subplot(4,1,4)
        # barPlot("avgRewardNoCollisionPenalty")

        plt.show()

    def plotMultipleRunData(self, simList, toPlot=["duration", "discountedReward"], controllerType="learned"):

        plt.figure()
        numPlots = len(toPlot)

        grid = np.arange(len(simList))

        def plot(fieldToPlot, plotNum):
            plt.subplot(numPlots, 1, plotNum)
            plt.title(fieldToPlot)
            val = 0 * grid
            barWidth = 0.5
            barCounter = 0
            for idx, sim in enumerate(simList):
                value = sim.runStatistics[controllerType][fieldToPlot]
                plt.bar(idx, value, barWidth)

        counter = 1
        for fieldToPlot in toPlot:
            plot(fieldToPlot, counter)
            counter += 1

        plt.show()

    def saveToFile(self, filename):

        # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime

        filename = "data/" + filename + ".out"
        my_shelf = shelve.open(filename, "n")

        my_shelf["options"] = self.options

        if self.options["SARSA"]["type"] == "discrete":
            my_shelf["SARSA_QValues"] = self.Sarsa.QValues

        my_shelf["simulationData"] = self.simulationData
        my_shelf["stateOverTime"] = self.stateOverTime
        my_shelf["raycastData"] = self.raycastData
        my_shelf["controlInputData"] = self.controlInputData
        my_shelf["emptyQValue"] = self.emptyQValue
        my_shelf["numTimesteps"] = self.numTimesteps
        my_shelf["idxDict"] = self.idxDict
        my_shelf["counter"] = self.counter
        my_shelf.close()

    @staticmethod
    def loadFromFile(filename):
        filename = "data/" + filename + ".out"
        sim = Simulator(autoInitialize=False, verbose=False)

        my_shelf = shelve.open(filename)
        sim.options = my_shelf["options"]
        sim.initialize()

        if sim.options["SARSA"]["type"] == "discrete":
            sim.Sarsa.QValues = np.array(my_shelf["SARSA_QValues"])

        sim.simulationData = my_shelf["simulationData"]
        # sim.runStatistics = my_shelf['runStatistics']
        sim.stateOverTime = np.array(my_shelf["stateOverTime"])
        sim.raycastData = np.array(my_shelf["raycastData"])
        sim.controlInputData = np.array(my_shelf["controlInputData"])
        sim.emptyQValue = np.array(my_shelf["emptyQValue"])
        sim.numTimesteps = my_shelf["numTimesteps"]
        sim.idxDict = my_shelf["idxDict"]
        sim.counter = my_shelf["counter"]

        my_shelf.close()

        return sim
Exemplo n.º 5
0
class Simulator(object):


    def __init__(self, percentObsDensity=20, endTime=40, nonRandomWorld=False,
                 circleRadius=0.7, worldScale=1.0, autoInitialize=True, verbose=True):
        self.verbose = verbose
        self.startSimTime = time.time()
        self.collisionThreshold = 0.4
        self.randomSeed = 5
        self.Sensor_rayLength = 8

        self.percentObsDensity = percentObsDensity
        self.defaultControllerTime = 1000
        self.nonRandomWorld = nonRandomWorld
        self.circleRadius = circleRadius
        self.worldScale = worldScale

        # create the visualizer object
        self.app = ConsoleApp()
        self.view = self.app.createView(useGrid=False)

        self.initializeOptions()
        self.initializeColorMap()
        
        if autoInitialize:
            self.initialize()

    def initializeOptions(self):
        self.options = dict()

        self.options['World'] = dict()
        self.options['World']['obstaclesInnerFraction'] = 0.98
        self.options['World']['randomSeed'] = 40
        self.options['World']['percentObsDensity'] = 10
        self.options['World']['nonRandomWorld'] = True
        self.options['World']['circleRadius'] = 1.0
        self.options['World']['scale'] = 1

        self.options['Sensor'] = dict()
        self.options['Sensor']['rayLength'] = 20
        self.options['Sensor']['numRays'] = 21


        self.options['Car'] = dict()
        self.options['Car']['velocity'] = 4.0

        self.options['dt'] = 0.05

        self.options['runTime'] = dict()
        self.options['runTime']['defaultControllerTime'] = 100


    def setDefaultOptions(self):

        defaultOptions = dict()


        defaultOptions['World'] = dict()
        defaultOptions['World']['obstaclesInnerFraction'] = 0.98
        defaultOptions['World']['randomSeed'] = 40
        defaultOptions['World']['percentObsDensity'] = 30
        defaultOptions['World']['nonRandomWorld'] = True
        defaultOptions['World']['circleRadius'] = 1.75
        defaultOptions['World']['scale'] = 2.5


        defaultOptions['Sensor'] = dict()
        defaultOptions['Sensor']['rayLength'] = 20
        defaultOptions['Sensor']['numRays'] = 41


        defaultOptions['Car'] = dict()
        defaultOptions['Car']['velocity'] = 20

        defaultOptions['dt'] = 0.05


        defaultOptions['runTime'] = dict()
        defaultOptions['runTime']['defaultControllerTime'] = 100


        for k in defaultOptions:
            self.options.setdefault(k, defaultOptions[k])


        for k in defaultOptions:
            if not isinstance(defaultOptions[k], dict):
                continue

            for j in defaultOptions[k]:
                self.options[k].setdefault(j, defaultOptions[k][j])


    def initializeColorMap(self):
        self.colorMap = dict()
        self.colorMap['default'] = [0,1,0]

    def initialize(self):

        self.dt = self.options['dt']
        self.controllerTypeOrder = ['default']

        self.setDefaultOptions()

        self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'],
                                numRays=self.options['Sensor']['numRays'])

        self.SensorApproximator = SensorApproximatorObj(numRays=self.options['Sensor']['numRays'], circleRadius=self.options['World']['circleRadius'], )

        self.Controller = ControllerObj(self.Sensor, self.SensorApproximator)

        self.Car = CarPlant(controller=self.Controller,
                            velocity=self.options['Car']['velocity'])

        self.Controller.initializeVelocity(self.Car.v)



        # create the things needed for simulation
        om.removeFromObjectModel(om.findObjectByName('world'))
        self.world = World.buildLineSegmentTestWorld(percentObsDensity=self.options['World']['percentObsDensity'],
                                            circleRadius=self.options['World']['circleRadius'],
                                            nonRandom=self.options['World']['nonRandomWorld'],
                                            scale=self.options['World']['scale'],
                                            randomSeed=self.options['World']['randomSeed'],
                                            obstaclesInnerFraction=self.options['World']['obstaclesInnerFraction'])

        om.removeFromObjectModel(om.findObjectByName('robot'))
        self.robot, self.frame = World.buildRobot()
        self.locator = World.buildCellLocator(self.world.visObj.polyData)
        self.Sensor.setLocator(self.locator)
        self.frame = self.robot.getChildFrame()
        self.frame.setProperty('Scale', 3)
        #self.frame.setProperty('Visible', False)
        #self.frame.setProperty('Edit', True)
        self.frame.widget.HandleRotationEnabledOff()
        rep = self.frame.widget.GetRepresentation()
        rep.SetTranslateAxisEnabled(2, False)
        rep.SetRotateAxisEnabled(0, False)
        rep.SetRotateAxisEnabled(1, False)

        self.defaultControllerTime = self.options['runTime']['defaultControllerTime']

        self.Car.setFrame(self.frame)
        print "Finished initialization"


    def runSingleSimulation(self, controllerType='default', simulationCutoff=None):


        #self.setRandomCollisionFreeInitialState()
        self.setInitialStateAtZero()

        currentCarState = np.copy(self.Car.state)
        nextCarState = np.copy(self.Car.state)
        self.setRobotFrameState(currentCarState[0], currentCarState[1], currentCarState[2])
        
        firstRaycast = self.Sensor.raycastAll(self.frame)
        firstRaycastLocations = self.Sensor.raycastAllLocations(self.frame)

        self.LineSegmentWorld = World.buildLineSegmentWorld(firstRaycastLocations)
        self.LineSegmentLocator = World.buildCellLocator(self.LineSegmentWorld.visObj.polyData)
        self.Sensor.setLocator(self.LineSegmentLocator)

        nextRaycast = np.zeros(self.Sensor.numRays)

        # record the reward data
        runData = dict()
        startIdx = self.counter

        thisRunIndex = 0
        NMaxSteps = 100

        while (self.counter < self.numTimesteps - 1):
            thisRunIndex += 1
            idx = self.counter
            currentTime = self.t[idx]
            self.stateOverTime[idx,:] = currentCarState
            x = self.stateOverTime[idx,0]
            y = self.stateOverTime[idx,1]
            theta = self.stateOverTime[idx,2]
            self.setRobotFrameState(x,y,theta)
            # self.setRobotState(currentCarState[0], currentCarState[1], currentCarState[2])
            currentRaycast = self.Sensor.raycastAll(self.frame)
            print "current Raycast ", currentRaycast
            self.raycastData[idx,:] = currentRaycast
            S_current = (currentCarState, currentRaycast)


            if controllerType not in self.colorMap.keys():
                print
                raise ValueError("controller of type " + controllerType + " not supported")


            if controllerType in ["default", "defaultRandom"]:
                controlInput, controlInputIdx = self.Controller.computeControlInput(currentCarState,
                                                                            currentTime, self.frame,
                                                                            raycastDistance=currentRaycast,
                                                                            randomize=False)

            self.controlInputData[idx] = controlInput

            nextCarState = self.Car.simulateOneStep(controlInput=controlInput, dt=self.dt)

        
            x = nextCarState[0]
            y = nextCarState[1]
            theta = nextCarState[2]
            self.setRobotFrameState(x,y,theta)
            nextRaycast = self.Sensor.raycastAll(self.frame)


            # Compute the next control input
            S_next = (nextCarState, nextRaycast)

            if controllerType in ["default", "defaultRandom"]:
                nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(nextCarState,
                                                                            currentTime, self.frame,
                                                                            raycastDistance=firstRaycast,
                                                                            randomize=False)


            #bookkeeping
            currentCarState = nextCarState
            currentRaycast = nextRaycast
            self.counter+=1

            # break if we are in collision
            if self.checkInCollision(nextRaycast):
                if self.verbose: print "Had a collision, terminating simulation"
                break

            if thisRunIndex > NMaxSteps:
                print "was safe during N steps"
                break

            if self.counter >= simulationCutoff:
                break


        # fill in the last state by hand
        self.stateOverTime[self.counter,:] = currentCarState
        self.raycastData[self.counter,:] = currentRaycast


        # this just makes sure we don't get stuck in an infinite loop.
        if startIdx == self.counter:
            self.counter += 1

        return runData

    def setNumpyRandomSeed(self, seed=1):
        np.random.seed(seed)

    def runBatchSimulation(self, endTime=None, dt=0.05):

        # for use in playback
        self.dt = self.options['dt']

        self.endTime = self.defaultControllerTime # used to be the sum of the other times as well

        self.t = np.arange(0.0, self.endTime, dt)
        maxNumTimesteps = np.size(self.t)
        self.stateOverTime = np.zeros((maxNumTimesteps+1, 3))
        self.raycastData = np.zeros((maxNumTimesteps+1, self.Sensor.numRays))
        self.controlInputData = np.zeros(maxNumTimesteps+1)
        self.numTimesteps = maxNumTimesteps

        self.controllerTypeOrder = ['default']
        self.counter = 0
        self.simulationData = []
    
        self.initializeStatusBar()

        self.idxDict = dict()
        numRunsCounter = 0


        self.idxDict['default'] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.defaultControllerTime/dt, self.numTimesteps)
        
        while ((self.counter - loopStartIdx < self.defaultControllerTime/dt) and self.counter < self.numTimesteps-1):
            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(controllerType='default',
                                               simulationCutoff=simCutoff)
            runData['startIdx'] = startIdx
            runData['controllerType'] = "default"
            runData['duration'] = self.counter - runData['startIdx']
            runData['endIdx'] = self.counter
            runData['runNumber'] = numRunsCounter
            numRunsCounter+=1
            self.simulationData.append(runData)

        # BOOKKEEPING
        # truncate stateOverTime, raycastData, controlInputs to be the correct size
        self.numTimesteps = self.counter + 1
        self.stateOverTime = self.stateOverTime[0:self.counter+1, :]
        self.raycastData = self.raycastData[0:self.counter+1, :]
        self.controlInputData = self.controlInputData[0:self.counter+1]
        self.endTime = 1.0*self.counter/self.numTimesteps*self.endTime



    def initializeStatusBar(self):
        self.numTicks = 10
        self.nextTickComplete = 1.0 / float(self.numTicks)
        self.nextTickIdx = 1
        print "Simulation percentage complete: (", self.numTicks, " # is complete)"
    
    def printStatusBar(self):
        fractionDone = float(self.counter) / float(self.numTimesteps)
        if fractionDone > self.nextTickComplete:

            self.nextTickIdx += 1
            self.nextTickComplete += 1.0 / self.numTicks

            timeSoFar = time.time() - self.startSimTime 
            estimatedTimeLeft_sec = (1 - fractionDone) * timeSoFar / fractionDone
            estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0

            print "#" * self.nextTickIdx, "-" * (self.numTicks - self.nextTickIdx), "estimated", estimatedTimeLeft_minutes, "minutes left"


    def setCollisionFreeInitialState(self):
        tol = 5

        while True:
            
            x = 0.0
            y =   -5.0
            theta = 0 #+ np.random.uniform(0,2*np.pi,1)[0] * 0.01
            
            self.Car.setCarState(x,y,theta)
            self.setRobotFrameState(x,y,theta)

            print "In loop"

            if not self.checkInCollision():
                break
                
        return x,y,theta


    
    def setInitialStateAtZero(self):
        
        x = 0.0
        y = 0.0
        theta = 0.0
        
        self.Car.setCarState(x,y,theta)
        self.setRobotFrameState(x,y,theta)

        return x,y,theta



    def setRandomCollisionFreeInitialState(self):
        tol = 5

        while True:
            
            x = np.random.uniform(self.world.Xmin+tol, self.world.Xmax-tol, 1)[0]
            y = np.random.uniform(self.world.Ymin+tol, self.world.Ymax-tol, 1)[0]
            theta = np.random.uniform(0,2*np.pi,1)[0]
            
            self.Car.setCarState(x,y,theta)
            self.setRobotFrameState(x,y,theta)

            if not self.checkInCollision():
                break

        return x,y,theta

    def setupPlayback(self):

        self.timer = TimerCallback(targetFps=30)
        self.timer.callback = self.tick

        playButtonFps = 1.0/self.dt
        print "playButtonFPS", playButtonFps
        self.playTimer = TimerCallback(targetFps=playButtonFps)
        self.playTimer.callback = self.playTimerCallback
        self.sliderMovedByPlayTimer = False

        panel = QtGui.QWidget()
        l = QtGui.QHBoxLayout(panel)

        playButton = QtGui.QPushButton('Play/Pause')
        playButton.connect('clicked()', self.onPlayButton)

        slider = QtGui.QSlider(QtCore.Qt.Horizontal)
        slider.connect('valueChanged(int)', self.onSliderChanged)
        self.sliderMax = self.numTimesteps
        slider.setMaximum(self.sliderMax)
        self.slider = slider

        l.addWidget(playButton)
        l.addWidget(slider)

        w = QtGui.QWidget()
        l = QtGui.QVBoxLayout(w)
        l.addWidget(self.view)
        self.view.orientationMarkerWidget().Off()
        l.addWidget(panel)
        w.showMaximized()

        self.frame.connectFrameModified(self.updateDrawIntersection)
        self.updateDrawIntersection(self.frame)
        
        applogic.resetCamera(viewDirection=[0.2,0,-1])
        self.view.showMaximized()
        self.view.raise_()
        panel = screengrabberpanel.ScreenGrabberPanel(self.view)
        panel.widget.show()

        cameracontrolpanel.CameraControlPanel(self.view).widget.show()

        elapsed = time.time() - self.startSimTime
        simRate = self.counter/elapsed
        print "Total run time", elapsed
        print "Ticks (Hz)", simRate
        print "Number of steps taken", self.counter
        self.app.start()

    def run(self, launchApp=True):
        self.counter = 1
        self.runBatchSimulation()

        if launchApp:
            self.setupPlayback()    

    def updateDrawIntersection(self, frame):

        origin = np.array(frame.transform.GetPosition())
        #print "origin is now at", origin
        d = DebugData()

        sliderIdx = self.slider.value

        controllerType = self.getControllerTypeFromCounter(sliderIdx)
        colorMaxRange = self.colorMap[controllerType]

        for i in xrange(self.Sensor.numRays):
            ray = self.Sensor.rays[:,i]
            rayTransformed = np.array(frame.transform.TransformNormal(ray))
            #print "rayTransformed is", rayTransformed
            intersection = self.Sensor.raycast(self.LineSegmentLocator, origin, origin + rayTransformed*self.Sensor.rayLength)

            if intersection is not None:
                d.addLine(origin, intersection, color=[1,0,0])
            else:
                d.addLine(origin, origin+rayTransformed*self.Sensor.rayLength, color=colorMaxRange)

        vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255')

        #camera = self.view.camera()
        #camera.SetFocalPoint(frame.transform.GetPosition())
        #camera.SetPosition(frame.transform.TransformPoint((-30,0,10)))


    def getControllerTypeFromCounter(self, counter):
        name = self.controllerTypeOrder[0]

        for controllerType in self.controllerTypeOrder[1:]:
            if counter >= self.idxDict[controllerType]:
                name = controllerType

        return name


    def setRobotFrameState(self, x, y, theta):
        t = vtk.vtkTransform()
        t.Translate(x,y,0.0)
        t.RotateZ(np.degrees(theta))
        self.robot.getChildFrame().copyFrame(t)

    # returns true if we are in collision
    def checkInCollision(self, raycastDistance=None):
        if raycastDistance is None:
            self.setRobotFrameState(self.Car.state[0],self.Car.state[1],self.Car.state[2])
            raycastDistance = self.Sensor.raycastAll(self.frame)

        if np.min(raycastDistance) < self.collisionThreshold:
            return True
        else:
            return False

        # if raycastDistance[(len(raycastDistance)+1)/2] < self.collisionThreshold:
        #     return True
        # else:
        #     return False

    def tick(self):
        #print timer.elapsed
        #simulate(t.elapsed)
        x = np.sin(time.time())
        y = np.cos(time.time())
        self.setRobotFrameState(x,y,0.0)
        if (time.time() - self.playTime) > self.endTime:
            self.playTimer.stop()

    def tick2(self):
        newtime = time.time() - self.playTime
        print time.time() - self.playTime
        x = np.sin(newtime)
        y = np.cos(newtime)
        self.setRobotFrameState(x,y,0.0)

    # just increment the slider, stop the timer if we get to the end
    def playTimerCallback(self):
        self.sliderMovedByPlayTimer = True
        currentIdx = self.slider.value
        nextIdx = currentIdx + 1
        self.slider.setSliderPosition(nextIdx)
        if currentIdx >= self.sliderMax:
            print "reached end of tape, stopping playTime"
            self.playTimer.stop()

    def onSliderChanged(self, value):
        if not self.sliderMovedByPlayTimer:
            self.playTimer.stop()
        numSteps = len(self.stateOverTime)
        idx = int(np.floor(numSteps*(1.0*value/self.sliderMax)))
        idx = min(idx, numSteps-1)
        x,y,theta = self.stateOverTime[idx]
        self.setRobotFrameState(x,y,theta)
        self.sliderMovedByPlayTimer = False

    def onPlayButton(self):

        if self.playTimer.isActive():
            self.onPauseButton()
            return

        print 'play'
        self.playTimer.start()
        self.playTime = time.time()

    def onPauseButton(self):
        print 'pause'
        self.playTimer.stop()

    def saveToFile(self, filename):

        # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime

        filename = 'data/' + filename + ".out"
        my_shelf = shelve.open(filename,'n')

        my_shelf['options'] = self.options

        my_shelf['simulationData'] = self.simulationData
        my_shelf['stateOverTime'] = self.stateOverTime
        my_shelf['raycastData'] = self.raycastData
        my_shelf['controlInputData'] = self.controlInputData
        my_shelf['numTimesteps'] = self.numTimesteps
        my_shelf['idxDict'] = self.idxDict
        my_shelf['counter'] = self.counter
        my_shelf.close()


    @staticmethod
    def loadFromFile(filename):
        filename = 'data/' + filename + ".out"
        sim = Simulator(autoInitialize=False, verbose=False)

        my_shelf = shelve.open(filename)
        sim.options = my_shelf['options']
        sim.initialize()

        sim.simulationData = my_shelf['simulationData']
        sim.stateOverTime = np.array(my_shelf['stateOverTime'])
        sim.raycastData = np.array( my_shelf['raycastData'])
        sim.controlInputData = np.array(my_shelf['controlInputData'])
        sim.numTimesteps = my_shelf['numTimesteps']
        sim.idxDict = my_shelf['idxDict']
        sim.counter = my_shelf['counter']

        my_shelf.close()

        return sim
Exemplo n.º 6
0
class Simulator(object):
    def __init__(self,
                 percentObsDensity=20,
                 endTime=40,
                 nonRandomWorld=False,
                 circleRadius=0.7,
                 worldScale=1.0,
                 autoInitialize=True,
                 verbose=True):
        self.verbose = verbose
        self.startSimTime = time.time()
        self.collisionThreshold = 1.3
        self.randomSeed = 5
        self.Sensor_rayLength = 8

        self.percentObsDensity = percentObsDensity
        self.defaultControllerTime = 1000
        self.nonRandomWorld = nonRandomWorld
        self.circleRadius = circleRadius
        self.worldScale = worldScale

        # create the visualizer object
        self.app = ConsoleApp()
        self.view = self.app.createView(useGrid=False)

        self.initializeOptions()
        self.initializeColorMap()

        if autoInitialize:
            self.initialize()

    def initializeOptions(self):
        self.options = dict()

        self.options['World'] = dict()
        self.options['World']['obstaclesInnerFraction'] = 0.85
        self.options['World']['randomSeed'] = 40
        self.options['World']['percentObsDensity'] = 7.5
        self.options['World']['nonRandomWorld'] = True
        self.options['World']['circleRadius'] = 1.0
        self.options['World']['scale'] = 1.0

        self.options['Sensor'] = dict()
        self.options['Sensor']['rayLength'] = 10
        self.options['Sensor']['numRays'] = 20

        self.options['Quad'] = dict()
        self.options['Quad']['velocity'] = 18

        self.options['dt'] = 0.05

        self.options['runTime'] = dict()
        self.options['runTime']['defaultControllerTime'] = 10

    def setDefaultOptions(self):

        defaultOptions = dict()

        defaultOptions['World'] = dict()
        defaultOptions['World']['obstaclesInnerFraction'] = 0.85
        defaultOptions['World']['randomSeed'] = 40
        defaultOptions['World']['percentObsDensity'] = 7.5
        defaultOptions['World']['nonRandomWorld'] = True
        defaultOptions['World']['circleRadius'] = 1.75
        defaultOptions['World']['scale'] = 1.0

        defaultOptions['Sensor'] = dict()
        defaultOptions['Sensor']['rayLength'] = 10
        defaultOptions['Sensor']['numRays'] = 20

        defaultOptions['Quad'] = dict()
        defaultOptions['Quad']['velocity'] = 18

        defaultOptions['dt'] = 0.05

        defaultOptions['runTime'] = dict()
        defaultOptions['runTime']['defaultControllerTime'] = 10

        for k in defaultOptions:
            self.options.setdefault(k, defaultOptions[k])

        for k in defaultOptions:
            if not isinstance(defaultOptions[k], dict):
                continue

            for j in defaultOptions[k]:
                self.options[k].setdefault(j, defaultOptions[k][j])

    def initializeColorMap(self):
        self.colorMap = dict()
        self.colorMap['default'] = [0, 1, 0]

    def initialize(self):

        self.dt = self.options['dt']
        self.controllerTypeOrder = ['default']

        self.setDefaultOptions()

        self.Sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'],
                                numRays=self.options['Sensor']['numRays'])

        self.Controller = ControllerObj(self.Sensor)

        self.Quad = QuadPlant(controller=self.Controller,
                              velocity=self.options['Quad']['velocity'])

        # create the things needed for simulation
        om.removeFromObjectModel(om.findObjectByName('world'))
        self.world = World.buildWarehouseWorld(
            percentObsDensity=self.options['World']['percentObsDensity'],
            circleRadius=self.options['World']['circleRadius'],
            nonRandom=self.options['World']['nonRandomWorld'],
            scale=self.options['World']['scale'],
            randomSeed=self.options['World']['randomSeed'],
            obstaclesInnerFraction=self.options['World']
            ['obstaclesInnerFraction'])

        om.removeFromObjectModel(om.findObjectByName('robot'))
        self.robot, self.frame = World.buildRobot()
        self.locator = World.buildCellLocator(self.world.visObj.polyData)
        self.Sensor.setLocator(self.locator)
        self.frame = self.robot.getChildFrame()
        self.frame.setProperty('Scale', 3)
        self.frame.setProperty('Edit', True)
        self.frame.widget.HandleRotationEnabledOff()
        rep = self.frame.widget.GetRepresentation()
        rep.SetTranslateAxisEnabled(2, False)
        rep.SetRotateAxisEnabled(0, False)
        rep.SetRotateAxisEnabled(1, False)

        self.defaultControllerTime = self.options['runTime'][
            'defaultControllerTime']

        self.Quad.setFrame(self.frame)
        print "Finished initialization"

    def runSingleSimulation(self,
                            controllerType='default',
                            simulationCutoff=None):

        self.setCollisionFreeInitialState()

        currentQuadState = np.copy(self.Quad.state)
        nextQuadState = np.copy(self.Quad.state)
        self.setRobotFrameState(currentQuadState[0], currentQuadState[1],
                                currentQuadState[2])
        currentRaycast = self.Sensor.raycastAll(self.frame)
        nextRaycast = np.zeros(self.Sensor.numRays)

        # record the reward data
        runData = dict()
        startIdx = self.counter

        while (self.counter < self.numTimesteps - 1):
            idx = self.counter
            currentTime = self.t[idx]
            self.stateOverTime[idx, :] = currentQuadState
            x = self.stateOverTime[idx, 0]
            y = self.stateOverTime[idx, 1]
            theta = self.stateOverTime[idx, 2]
            self.setRobotFrameState(x, y, theta)
            # self.setRobotState(currentQuadState[0], currentQuadState[1], currentQuadState[2])
            currentRaycast = self.Sensor.raycastAll(self.frame)
            self.raycastData[idx, :] = currentRaycast
            S_current = (currentQuadState, currentRaycast)

            if controllerType not in self.colorMap.keys():
                print
                raise ValueError("controller of type " + controllerType +
                                 " not supported")

            if controllerType in ["default", "defaultRandom"]:
                controlInput, controlInputIdx = self.Controller.computeControlInput(
                    currentQuadState,
                    currentTime,
                    self.frame,
                    raycastDistance=currentRaycast,
                    randomize=False)

            self.controlInputData[idx] = controlInput

            nextQuadState = self.Quad.simulateOneStep(
                controlInput=controlInput, dt=self.dt)

            # want to compute nextRaycast so we can do the SARSA algorithm
            x = nextQuadState[0]
            y = nextQuadState[1]
            theta = nextQuadState[2]
            self.setRobotFrameState(x, y, theta)
            nextRaycast = self.Sensor.raycastAll(self.frame)

            # Compute the next control input
            S_next = (nextQuadState, nextRaycast)

            if controllerType in ["default", "defaultRandom"]:
                nextControlInput, nextControlInputIdx = self.Controller.computeControlInput(
                    nextQuadState,
                    currentTime,
                    self.frame,
                    raycastDistance=nextRaycast,
                    randomize=False)

            #bookkeeping
            currentQuadState = nextQuadState
            currentRaycast = nextRaycast
            self.counter += 1

            # break if we are in collision
            if self.checkInCollision(nextRaycast):
                if self.verbose:
                    print "Had a collision, terminating simulation"
                break

            if self.counter >= simulationCutoff:
                break

        # fill in the last state by hand
        self.stateOverTime[self.counter, :] = currentQuadState
        self.raycastData[self.counter, :] = currentRaycast

        # this just makes sure we don't get stuck in an infinite loop.
        if startIdx == self.counter:
            self.counter += 1

        return runData

    def setNumpyRandomSeed(self, seed=1):
        np.random.seed(seed)

    def runBatchSimulation(self, endTime=None, dt=0.05):

        # for use in playback
        self.dt = self.options['dt']

        self.endTime = self.defaultControllerTime  # used to be the sum of the other times as well

        self.t = np.arange(0.0, self.endTime, dt)
        maxNumTimesteps = np.size(self.t)
        self.stateOverTime = np.zeros((maxNumTimesteps + 1, 3))
        self.raycastData = np.zeros((maxNumTimesteps + 1, self.Sensor.numRays))
        self.controlInputData = np.zeros(maxNumTimesteps + 1)
        self.numTimesteps = maxNumTimesteps

        self.controllerTypeOrder = ['default']
        self.counter = 0
        self.simulationData = []

        self.initializeStatusBar()

        self.idxDict = dict()
        numRunsCounter = 0

        self.idxDict['default'] = self.counter
        loopStartIdx = self.counter
        simCutoff = min(loopStartIdx + self.defaultControllerTime / dt,
                        self.numTimesteps)

        while ((self.counter - loopStartIdx < self.defaultControllerTime / dt)
               and self.counter < self.numTimesteps - 1):
            self.printStatusBar()
            startIdx = self.counter
            runData = self.runSingleSimulation(controllerType='default',
                                               simulationCutoff=simCutoff)
            runData['startIdx'] = startIdx
            runData['controllerType'] = "default"
            runData['duration'] = self.counter - runData['startIdx']
            runData['endIdx'] = self.counter
            runData['runNumber'] = numRunsCounter
            numRunsCounter += 1
            self.simulationData.append(runData)

        # BOOKKEEPING
        # truncate stateOverTime, raycastData, controlInputs to be the correct size
        self.numTimesteps = self.counter + 1
        self.stateOverTime = self.stateOverTime[0:self.counter + 1, :]
        self.raycastData = self.raycastData[0:self.counter + 1, :]
        self.controlInputData = self.controlInputData[0:self.counter + 1]
        self.endTime = 1.0 * self.counter / self.numTimesteps * self.endTime

    def initializeStatusBar(self):
        self.numTicks = 10
        self.nextTickComplete = 1.0 / float(self.numTicks)
        self.nextTickIdx = 1
        print "Simulation percentage complete: (", self.numTicks, " # is complete)"

    def printStatusBar(self):
        fractionDone = float(self.counter) / float(self.numTimesteps)
        if fractionDone > self.nextTickComplete:

            self.nextTickIdx += 1
            self.nextTickComplete += 1.0 / self.numTicks

            timeSoFar = time.time() - self.startSimTime
            estimatedTimeLeft_sec = (1 -
                                     fractionDone) * timeSoFar / fractionDone
            estimatedTimeLeft_minutes = estimatedTimeLeft_sec / 60.0

            print "#" * self.nextTickIdx, "-" * (
                self.numTicks - self.nextTickIdx
            ), "estimated", estimatedTimeLeft_minutes, "minutes left"

    def setCollisionFreeInitialState(self):
        tol = 5

        while True:

            x = 0.0
            y = -5.0
            theta = 0  #+ np.random.uniform(0,2*np.pi,1)[0] * 0.01

            self.Quad.setQuadState(x, y, theta)
            self.setRobotFrameState(x, y, theta)

            print "In loop"

            if not self.checkInCollision():
                break

        return x, y, theta

    def setRandomCollisionFreeInitialState(self):
        tol = 5

        while True:

            x = np.random.uniform(self.world.Xmin + tol, self.world.Xmax - tol,
                                  1)[0]
            y = np.random.uniform(self.world.Ymin + tol, self.world.Ymax - tol,
                                  1)[0]
            theta = np.random.uniform(0, 2 * np.pi, 1)[0]

            self.Quad.setQuadState(x, y, theta)
            self.setRobotFrameState(x, y, theta)

            if not self.checkInCollision():
                break

        return x, y, theta

    def setupPlayback(self):

        self.timer = TimerCallback(targetFps=30)
        self.timer.callback = self.tick

        playButtonFps = 1.0 / self.dt
        print "playButtonFPS", playButtonFps
        self.playTimer = TimerCallback(targetFps=playButtonFps)
        self.playTimer.callback = self.playTimerCallback
        self.sliderMovedByPlayTimer = False

        panel = QtGui.QWidget()
        l = QtGui.QHBoxLayout(panel)

        playButton = QtGui.QPushButton('Play/Pause')
        playButton.connect('clicked()', self.onPlayButton)

        slider = QtGui.QSlider(QtCore.Qt.Horizontal)
        slider.connect('valueChanged(int)', self.onSliderChanged)
        self.sliderMax = self.numTimesteps
        slider.setMaximum(self.sliderMax)
        self.slider = slider

        l.addWidget(playButton)
        l.addWidget(slider)

        w = QtGui.QWidget()
        l = QtGui.QVBoxLayout(w)
        l.addWidget(self.view)
        l.addWidget(panel)
        w.showMaximized()

        self.frame.connectFrameModified(self.updateDrawIntersection)
        self.updateDrawIntersection(self.frame)

        applogic.resetCamera(viewDirection=[0.2, 0, -1])
        self.view.showMaximized()
        self.view.raise_()
        panel = screengrabberpanel.ScreenGrabberPanel(self.view)
        panel.widget.show()

        elapsed = time.time() - self.startSimTime
        simRate = self.counter / elapsed
        print "Total run time", elapsed
        print "Ticks (Hz)", simRate
        print "Number of steps taken", self.counter
        self.app.start()

    def run(self, launchApp=True):
        self.counter = 1
        self.runBatchSimulation()
        # self.Sarsa.plotWeights()

        if launchApp:
            self.setupPlayback()

    def updateDrawIntersection(self, frame):

        origin = np.array(frame.transform.GetPosition())
        #print "origin is now at", origin
        d = DebugData()

        sliderIdx = self.slider.value

        controllerType = self.getControllerTypeFromCounter(sliderIdx)
        colorMaxRange = self.colorMap[controllerType]

        for j in xrange(self.Sensor.numLayers):
            for i in xrange(self.Sensor.numRays):
                ray = self.Sensor.rays[:, i, j]
                rayTransformed = np.array(frame.transform.TransformNormal(ray))
                #print "rayTransformed is", rayTransformed
                intersection = self.Sensor.raycast(
                    self.locator, origin,
                    origin + rayTransformed * self.Sensor.rayLength)

                if intersection is not None:
                    d.addLine(origin, intersection, color=[1, 0, 0])
                else:
                    d.addLine(origin,
                              origin + rayTransformed * self.Sensor.rayLength,
                              color=colorMaxRange)

        vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255')

        #camera = self.view.camera()
        #camera.SetFocalPoint(frame.transform.GetPosition())
        #camera.SetPosition(frame.transform.TransformPoint((-30,0,10)))

    def getControllerTypeFromCounter(self, counter):
        name = self.controllerTypeOrder[0]

        for controllerType in self.controllerTypeOrder[1:]:
            if counter >= self.idxDict[controllerType]:
                name = controllerType

        return name

    def setRobotFrameState(self, x, y, theta):
        t = vtk.vtkTransform()
        t.Translate(x, y, 0.0)
        t.RotateZ(np.degrees(theta))
        self.robot.getChildFrame().copyFrame(t)

    # returns true if we are in collision
    def checkInCollision(self, raycastDistance=None):
        if raycastDistance is None:
            self.setRobotFrameState(self.Quad.state[0], self.Quad.state[1],
                                    self.Quad.state[2])
            raycastDistance = self.Sensor.raycastAll(self.frame)

        if np.min(raycastDistance) < self.collisionThreshold:
            return True
        else:
            return False

    def tick(self):
        #print timer.elapsed
        #simulate(t.elapsed)
        x = np.sin(time.time())
        y = np.cos(time.time())
        self.setRobotFrameState(x, y, 0.0)
        if (time.time() - self.playTime) > self.endTime:
            self.playTimer.stop()

    def tick2(self):
        newtime = time.time() - self.playTime
        print time.time() - self.playTime
        x = np.sin(newtime)
        y = np.cos(newtime)
        self.setRobotFrameState(x, y, 0.0)

    # just increment the slider, stop the timer if we get to the end
    def playTimerCallback(self):
        self.sliderMovedByPlayTimer = True
        currentIdx = self.slider.value
        nextIdx = currentIdx + 1
        self.slider.setSliderPosition(nextIdx)
        if currentIdx >= self.sliderMax:
            print "reached end of tape, stopping playTime"
            self.playTimer.stop()

    def onSliderChanged(self, value):
        if not self.sliderMovedByPlayTimer:
            self.playTimer.stop()
        numSteps = len(self.stateOverTime)
        idx = int(np.floor(numSteps * (1.0 * value / self.sliderMax)))
        idx = min(idx, numSteps - 1)
        x, y, theta = self.stateOverTime[idx]
        self.setRobotFrameState(x, y, theta)
        self.sliderMovedByPlayTimer = False

    def onPlayButton(self):

        if self.playTimer.isActive():
            self.onPauseButton()
            return

        print 'play'
        self.playTimer.start()
        self.playTime = time.time()

    def onPauseButton(self):
        print 'pause'
        self.playTimer.stop()

    def saveToFile(self, filename):

        # should also save the run data if it is available, i.e. stateOverTime, rewardOverTime

        filename = 'data/' + filename + ".out"
        my_shelf = shelve.open(filename, 'n')

        my_shelf['options'] = self.options

        my_shelf['simulationData'] = self.simulationData
        my_shelf['stateOverTime'] = self.stateOverTime
        my_shelf['raycastData'] = self.raycastData
        my_shelf['controlInputData'] = self.controlInputData
        my_shelf['numTimesteps'] = self.numTimesteps
        my_shelf['idxDict'] = self.idxDict
        my_shelf['counter'] = self.counter
        my_shelf.close()

    @staticmethod
    def loadFromFile(filename):
        filename = 'data/' + filename + ".out"
        sim = Simulator(autoInitialize=False, verbose=False)

        my_shelf = shelve.open(filename)
        sim.options = my_shelf['options']
        sim.initialize()

        sim.simulationData = my_shelf['simulationData']
        sim.stateOverTime = np.array(my_shelf['stateOverTime'])
        sim.raycastData = np.array(my_shelf['raycastData'])
        sim.controlInputData = np.array(my_shelf['controlInputData'])
        sim.numTimesteps = my_shelf['numTimesteps']
        sim.idxDict = my_shelf['idxDict']
        sim.counter = my_shelf['counter']

        my_shelf.close()

        return sim
Exemplo n.º 7
0
class SimpleSimulator(object):

    def __init__(self):
        self.setDefaultOptions()
        self.initialize()

    def setDefaultOptions(self):
        defaultOptions = dict()
        defaultOptions['Sensor'] = dict()
        defaultOptions['Sensor']['rayLength'] = 20
        defaultOptions['Sensor']['numRays'] = 100
        defaultOptions['Sensor']['FOV'] = 270

        defaultOptions['Sim'] = dict()
        defaultOptions['Sim']['dt'] = 0.05
        defaultOptions['Sim']['usePursuitController'] = True

        defaultOptions['featureDetector'] = {}
        defaultOptions['featureDetector']['detectCorners'] = True
        self.options = defaultOptions

        self.app = ConsoleApp()
        self.view = self.app.createView(useGrid=False)

    def initialize(self):
        self.car = DubinsCar()

        self.sensor = SensorObj(rayLength=self.options['Sensor']['rayLength'],
                                numRays=self.options['Sensor']['numRays'],
                                FOV=self.options['Sensor']['FOV'])


        self.controller = DubinsPIDController(self.sensor, self.car)
        self.world = World.buildDonutWorld()
        self.locator = World.buildCellLocator(self.world.visObj.polyData)
        self.sensor.setLocator(self.locator)
        self.robot, self.carFrame = World.buildRobot()
        self.car.setFrame(self.carFrame)
        self.locator = World.buildCellLocator(self.world.visObj.polyData)
        self.wallDetector = WallDetector(self.sensor)
        self.planner = SimplePlanner(self.sensor)
        self.pursuitController = PurePursuitController(self.car, self.sensor)
        self.featureDetector = FeatureDetector(self.sensor)

    def updateDrawIntersection(self, frame):

        origin = np.array(frame.transform.GetPosition())
        #print "origin is now at", origin
        d = DebugData()

        colorMaxRange = [0,1,0]

        for i in xrange(self.sensor.numRays):
            ray = self.sensor.rays[:,i]
            rayTransformed = np.array(frame.transform.TransformNormal(ray))
            #print "rayTransformed is", rayTransformed
            intersection = self.sensor.raycast(self.locator, origin, origin + rayTransformed*self.sensor.rayLength)

            if intersection is not None:
                d.addLine(origin, intersection, color=[1,0,0])
            else:
                d.addLine(origin, origin+rayTransformed*self.sensor.rayLength, color=colorMaxRange)

        vis.updatePolyData(d.getPolyData(), 'rays', colorByName='RGB255')


    def playTimerCallback(self):
        self.simulateOneStep(self.options['Sim']['dt'])

    def simulateOneStep(self, dt):


        verbose = True
        raycastData = self.sensor.raycastAllFromCurrentFrameLocation()

        if self.options['Sim']['usePursuitController']:
            L_fw, theta = self.planner.computePursuitPoint(raycastData, plot=True)
            carState = self.car.state
            controlInput = self.pursuitController.computeControlInput(carState, dt, L_fw, theta)

        if self.options['featureDetector']['detectCorners']:
            self.featureDetector.detectCorner(raycastData, plot=True, verbose=True)

        newState = self.car.simulateOneStep(controlInput, dt=dt)
        self.car.setFrameToState()

    def onPlayButton(self):

        if self.playTimer.isActive():
            self.playTimer.stop()
            return

        self.car.resetStateToFrameLocation()
        self.playTimer.start()

    def launchViewer(self):

        playButtonFps = 1.0/self.options['Sim']['dt']

        print "playButtonFPS", playButtonFps
        self.playTimer = TimerCallback(targetFps=playButtonFps)
        self.playTimer.callback = self.playTimerCallback

        panel = QtGui.QWidget()
        l = QtGui.QHBoxLayout(panel)

        playButton = QtGui.QPushButton('Play/Pause')
        playButton.connect('clicked()', self.onPlayButton)

        l.addWidget(playButton)

        w = QtGui.QWidget()
        l = QtGui.QVBoxLayout(w)
        l.addWidget(self.view)
        l.addWidget(panel)
        w.showMaximized()


        self.carFrame.connectFrameModified(self.updateDrawIntersection)
        self.updateDrawIntersection(self.carFrame)

        applogic.resetCamera(viewDirection=[0.2,0,-1])
        self.view.showMaximized()
        self.view.raise_()

        self.app.start()

    def testCrossTrackError(self):
        carState = np.copy(self.car.state)
        carState[self.car.idx['v']] = 5.0
        raycastData = self.sensor.raycastAllFromCurrentFrameLocation()
        wallsFound = self.wallDetector.findWalls(raycastData, addNoise=False)

        return self.controller.computeCrossTrackError(carState, wallsFound, verbose=True)

    def testPursuitController(self):
        L_fw, theta = self.planner.testFromCurrent()
        self.car.resetStateToFrameLocation()
        v_des = 5.0
        carState = self.car.state
        dt = 0.05
        u = self.pursuitController.computeControlInput(carState, dt, L_fw, theta, v_des, verbose=True)

    def testDetectFeature(self):
        raycastData = self.sensor.raycastAllFromCurrentFrameLocation()
        self.featureDetector.detectCorner(raycastData, plot=True, verbose=True)