示例#1
0
文件: model.py 项目: mekruthi/mmlf
 def _jointStateAction(self, state, action):
     """ Create a joint state-action pseudo-state """
     dimensions = [dimension for dimension in state.dimensions]
     actionDimension = copy.deepcopy(
         self.actionSpace.getDimensions()[0])  # there is per assert only 1
     dimensions.append(actionDimension)
     stateAction = State(numpy.hstack((state, action)), dimensions)
     stateAction.scale()
     return stateAction
示例#2
0
文件: spaces.py 项目: mekruthi/mmlf
    def getStateList(self):
        """ Returns a list of all possible states.
        
        Even if this state space has more than one dimension, it returns a one
        dimensional list that contains all possible states.        
        
        This is achieved by creating the crossproduct of the values of
        all dimensions. It requires that all dimensions are discrete.
        """
        # Check that all dimensions are discrete
        for dimension in self.getDimensions():
            assert dimension.isDiscrete(), \
                   "State list are available only for discrete state spaces!"

        #Create cross product of all possible actions
        crossProduct = lambda ss, row=[], level=0: \
                            len(ss)>1 \
                            and reduce(lambda x,y:x+y,[crossProduct(ss[1:],row+[i],level+1) for i in ss[0]]) \
                            or [row+[i] for i in ss[0]]

        listOfStateDimensionValues = [
            dimension.getValues() for dimension in self.getDimensions()
        ]

        # Return crossproduct of states
        return map(lambda value: State(value, self.getDimensions()),
                   crossProduct(listOfStateDimensionValues))
示例#3
0
文件: knn.py 项目: mekruthi/mmlf
 def computeQ(self, state, action):
     """ Computes the Q-value of the given state-action pair
     
     The Q-Value of the query state-action is computed as  weighted linear 
     combination of the *k* nearest neighbors, where the weighting is based
     on the distance between the respective state and the query state. 
     """
     if not action in self.actionsKDTree \
         or self.actionsKDTree[action] == None:
         return 0.0
    
     k = min(self.k, self.states[action].shape[0])
     indices, distances = self.actionsKDTree[action].knn(state, k)
     
     qValue = 0.0
     denominator = 0.0
     for index, distance in zip(indices[0], distances[0]):
         neighbor = State(self.states[action][index],
                          state.dimensions)
         neighborsQValue = self.qValues[(neighbor, action)]
         
         weight = gaussian(distance, self.b_X)
         qValue += weight * neighborsQValue
         denominator += weight
     
     return qValue / denominator
示例#4
0
    def getNearestNeighbors(self, state, k, b):
        """ Determines *k* most similar states to the given *state*
        
        Determines *k* most similar states to the given *state*. Returns an
        iterator over (weight, neighbor), where weight is the guassian weigthed
        influence of the neighbor onto *state*. The weight is computed via
        exp(-dist/b**2)/sum_over_neighbors(exp(-dist_1/b**2)).
        Note that the weights sum to 1.
        """
        if self.states is not None:
            k = min(k, self.states.shape[1])

            if hasattr(self,
                       "kdTree"):  # if we can use approximate nearest neighbor
                indices, distances = self.kdTree.knn(state, k=k)

                # Compute weights based on distance
                weights = numpy.exp(-distances[0] / (b**2))
                denominator = numpy.sum(weights)

                # If the distances become too large, then all values can become zero
                # In this situation, we simply return the closest state and probability 1.
                if denominator == 0:
                    import warnings
                    warnings.warn(
                        "Too large distances, returning only closest example")
                    indices[0] = [indices[0][0]]
                    weights[0] = 1.0
                else:
                    # Normalize weights
                    weights = weights / denominator

                for index, weight in zip(indices[0], weights):
                    yield weight, State(self.states.T[index], state.dimensions)
            else:
                assert k == 1
                minDist = numpy.inf
                closestSample = None
                for index in range(self.states.shape[1]):
                    sampleState = self.states.T[index]
                    dist = numpy.linalg.norm(state - sampleState)
                    if dist < minDist:
                        minDist = dist
                        closestSample = sampleState
                yield 1.0, State(closestSample, state.dimensions)
        else:
            raise ModelNotInitialized("No state samples available")
示例#5
0
    def getSuccessorDistribution(self, state):
        """ Return the successor distribution for the given *state*. 
        
        Returns an iterator that yields pairs of states and
        their probabilities of being the successor of the given *state*. 
        """
        if self.states == None:
            raise ModelNotInitialized()

        k = min(self.states.shape[0], self.k)

        if self.rebuildSucc:
            self.succKDTree = ann.kdtree(self.states)
            self.rebuildSucc = False

        indices, distances = self.succKDTree.knn(state, k)

        denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2)))

        # If the distances become too large, then all values can become zero
        # In this situation, we simply return the closest state and probability 1.
        if denominator == 0 or numpy.isnan(denominator):
            import warnings
            warnings.warn(
                "Too large distances, returning only closest example")
            indices[0] = [indices[0][0]]
            distances[0] = [0.0]
            denominator = numpy.exp(0.0 / (self.b_Sa**2))

        for index, distance in zip(indices[0], distances[0]):
            neighbor = State(
                self.states[index],
                state.dimensions)  # TODO: not use state.dimensions
            succState, reward = self.successorSamples[neighbor]

            delta = succState - neighbor
            predictedSuccState = State(state + delta, state.dimensions)

            if not 0 <= gaussian(distance, self.b_Sa) / denominator <= 1:
                import warnings
                import sys
                warnings.warn("Invalid distances in KNN Model!")
                print distances
                sys.exit(0)

            yield predictedSuccState, gaussian(distance,
                                               self.b_Sa) / denominator
示例#6
0
文件: cmac.py 项目: mekruthi/mmlf
 def getTile(self, state):
     """ Compute the activated tile for the given state_value """
     if state in self.stateToTileCache:
         return self.stateToTileCache[state]
     else:
         scaledState = State(state, copy.copy(
             state.dimensions))  # avoid side-effects
         scaledState.scale(0, 1)
         tile = tuple(
             numpy.round((numpy.array(scaledState) + self.offset) *
                         self.tilesPerDimension).astype(numpy.int))
         self.stateToTileCache[state] = tile
         self.recentStatesOrder.appendleft(state)
         if len(self.recentStatesOrder) > 50:
             oldestState = self.recentStatesOrder.pop()
             self.stateToTileCache.pop(oldestState)
         return tile
示例#7
0
 def sampleState(self):
     """ Return a state drawn randomly """
     stateDensity = self.exampleSet.getStateDensity()
     if stateDensity != None:
         # TODO: Does it make sense to sample based on the data set?
         return State(stateDensity.resample(1).T[0])
     else:
         raise ModelNotInitialized()
示例#8
0
文件: spaces.py 项目: mekruthi/mmlf
    def parseStateDict(self, stateDict):
        #Check whether the given state dict is a valid one
        assert self._isValidState(
            stateDict), "State %s is invalid!" % stateDict

        state = State([stateDict[key] for key in sorted(stateDict.keys())],
                      map(lambda name: self[name], sorted(stateDict.keys())))

        return state
示例#9
0
 def getStates(self):
     """ Return all states contained in this example set """
     if self.states is not None and self.states.shape[1] >= 1:
         return [
             State(self.states[:, i], self.stateDimensions)
             for i in range(self.states.shape[1])
         ]
     else:
         return []
示例#10
0
 def sampleSuccessorState(self, state):
     """ Return a state drawn from the state's successor distribution """
     if self._retrainingRequired():
         self._updateModel()
     if self.succStateModel != None:
         return State(state + self.succStateModel.predict(state),
                      state.dimensions)
     else:
         raise ModelNotInitialized()
示例#11
0
    def redraw(self):
        # Update policy visualization
        for arrow in self.arrowInstances:
            arrow.remove()
        self.arrowInstances = []
        # Iterate over all states and compute the value of the observed function
        dimensions = [
            self.stateSpace[dimName] for dimName in ["column", "row"]
        ]
        states = [
            State((column, row), dimensions)  #
            for column in range(self.maze.getColumns())
            for row in range(self.maze.getRows())
        ]
        for state in states:
            # Evaluate function for this state
            actionValues = dict(
                (action, self.valueAccessFunction(state, (action, )) if self.
                 valueAccessFunction is not None else 0.0)
                for action in ["up", "down", "left", "right"])
            maxValue = max(actionValues.values())
            axis = self.figPolicy.gca()
            for action in actionValues.keys():
                if actionValues[action] == maxValue:
                    self._plotArrow(axis, (state[0], state[1]), action)

        # Update Q-function visualization
        for state in states:
            for action in ["up", "down", "left", "right"]:
                value =  self.valueAccessFunction(state, (action,)) \
                                if self.valueAccessFunction is not None else 0.0
                if int(value) == value:
                    valueString = "%s\n%s" % (int(value),
                                              self.samples[(state, action)])
                else:
                    valueString = "%.1f\n%s" % (value, self.samples[(state,
                                                                     action)])
                if (state, action) not in self.textInstances.keys():
                    if isinstance(
                            action, tuple
                    ):  # For TD-Agents that use crossproduct of action space
                        axis = self.figValueFunction[action[0]].gca()
                    else:
                        axis = self.figValueFunction[action].gca()
                    textInstance = \
                        axis.text(state[0] - 0.3, state[1], valueString, fontsize=8)
                    self.textInstances[(state, action)] = textInstance
                else:
                    self.textInstances[(state, action)].set_text(valueString)

        self.canvasPolicy.draw()
        for index, action in enumerate(self.actions):
            self.canvasValueFunction[action].draw()
示例#12
0
def boundState(state):
    """ Return the given state with each dimension bounded to [-0.1, 1.1] """
    assert type(state) == State
    dimensions = state.dimensions
    # change to numpy array since where operator does not work on States
    state = numpy.array(state)  # Create a copy
    state[numpy.where(state < 0.0)] = 0.0
    state[numpy.where(state > 1.0)] = 1.0
    # Change back to State
    state = State(state, dimensions)

    return state
示例#13
0
    def evaluate(self, state):
        """ Evaluates the policy for the given state """
        # If bias is desired, we simply append an additional dimension that
        # always takes the value 1
        if self.bias:
            dimensions = [dimension for dimension in state.dimensions]
            biasDimension = Dimension("zzz_bias", "continuous", [[0, 1]])
            dimensions.append(biasDimension)
            input = State(numpy.hstack((state, [1])), dimensions)
        else:  # Just create a copy of the state
            input = State(state, state.dimensions)

        # Scale state dimensions to range  (-1, 1)
        input.scale(-1, 1)

        # Compute the activation (the preference of the policy) for each action
        # The last action has always activation 0 (remove redundant
        # representations for the same policy)
        actionActivations = []
        for actionIndex in range(self.numActions - 1):
            activation = numpy.dot(
                self.weights[self.inputDims * actionIndex:self.inputDims *
                             (actionIndex + 1)], input)
            actionActivations.append(activation)
        actionActivations.append(0.0)

        # Greedy action selection
        selectedAction = max(
            zip(actionActivations, range(len(actionActivations))))[1]

        return self.actions[selectedAction]
示例#14
0
    def evaluate(self, state):
        """ Evaluates the policy for the given state """
        # If bias is desired, we simply append an additional dimension that
        # always takes the value 1
        if self.bias:
            dimensions = [dimension for dimension in state.dimensions]
            biasDimension = Dimension("zzz_bias", "continuous", [[0, 1]])
            dimensions.append(biasDimension)
            state = State(numpy.hstack((state, [1])), dimensions)

        # Scale state dimensions to range  (-1, 1)
        state.scale(-1, 1)

        # Compute the activation (the preference of the policy) for each action
        output = []
        for outputDimIndex in range(self.numActions):
            activation = numpy.dot(
                self.weights[self.inputDims * outputDimIndex:self.inputDims *
                             (outputDimIndex + 1)], state)
            output.append(activation)

        return output
示例#15
0
    def getPredecessorDistribution(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        """
        if self.succStates == None:
            raise ModelNotInitialized()

        k = min(self.states.shape[0], self.k)

        if self.rebuildPred:
            self.predKDTree = ann.kdtree(self.succStates)
            self.rebuildPred = False

        indices, distances = self.predKDTree.knn(state, k)

        denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2)))

        # If the distances become too large, then all values can become zero
        # In this situation, we simply return the closest state and probability 1.
        if denominator == 0:
            import warnings
            warnings.warn("Too large distances, returing only closest example")
            indices[0] = [indices[0][0]]
            distances[0] = [0.0]
            denominator = numpy.exp(0.0 / (self.b_Sa**2))

        for index, distance in zip(indices[0], distances[0]):
            neighbor = State(
                self.succStates[index],
                state.dimensions)  # TODO: not use state.dimensions
            predState, reward = self.predecessorSamples[neighbor]

            delta = predState - neighbor
            predictedPredState = State(state + delta, state.dimensions)

            yield predictedPredState, gaussian(distance,
                                               self.b_Sa) / denominator
示例#16
0
def generate2dStateSlice(varyDimensions,
                         stateSpace,
                         defaultDimValues,
                         gridNodesPerDim,
                         varyValueRanges=None):
    """ Generate a set of states that form a 2d slice through state space.
    
    The set of states consists of gridNodesPerDim**2 states. Each of them has 
    the value given by defaultDimValues except for the values in the two 
    dimensions passed by varyDimensions. In these two dimensions, the value
    is determined based on a 2d grid that fills [0,1]x[0,1].
    """
    assert len(varyDimensions) == 2, \
        " We need two varyDimensions to create a 2d state space slice."

    assert (sorted(stateSpace.keys()) == sorted(set(defaultDimValues.keys() + varyDimensions))), \
        "Cannot create a 2d state space slice since value definition is not "\
        "consistent with state space definition."

    if varyValueRanges == None:  # Set default value ranges
        varyValueRanges = [
            stateSpace[varyDimensions[0]].getValueRanges()[0],
            stateSpace[varyDimensions[1]].getValueRanges()[0]
        ]

    # Sort state dimensions according to dimension name
    dimensions = [stateSpace[dimName] for dimName in sorted(stateSpace.keys())]
    defaultValue = [
        defaultDimValues[dimName]
        for dimName in sorted(defaultDimValues.keys())
    ]

    # The indices of the dimensions which vary
    varyDimensionIndex1 = sorted(stateSpace.keys()).index(varyDimensions[0])
    varyDimensionIndex2 = sorted(stateSpace.keys()).index(varyDimensions[1])

    # Create the 2d slice
    slice2d = {}
    for i, value1 in enumerate(
            numpy.linspace(varyValueRanges[0][0], varyValueRanges[0][1],
                           gridNodesPerDim)):
        for j, value2 in enumerate(
                numpy.linspace(varyValueRanges[1][0], varyValueRanges[1][1],
                               gridNodesPerDim)):
            # instantiate default value
            defaultValue[varyDimensionIndex1] = value1
            defaultValue[varyDimensionIndex2] = value2
            # Create state object
            slice2d[(i, j)] = State(defaultValue, dimensions)

    return slice2d
示例#17
0
    def getSuccessorDistribution(self, state):
        """ Return the successor distribution for the given state. 
        
        Returns an iterator that yields pairs of grid nodes and
        their probabilities of being the successor of the given state. 
        """
        if self._retrainingRequired():
            self._updateModel()

        if self.succStateModel != None:
            # This is a deterministic model!
            yield (State(state + self.succStateModel.predict(state),
                         state.dimensions), 1.0)
        else:
            raise ModelNotInitialized()
示例#18
0
    def updateValues(self, valueAccessFunction, actions):
        self.axisValueFunction.clear()
        for action in actions:
            actionValues = []
            for state in sorted(self.states):
                actionValues.append(
                    valueAccessFunction(
                        State([state], self.stateSpace.values()), action))

            self.axisValueFunction.plot(sorted(self.states),
                                        actionValues,
                                        label=str(action))

        self.axisValueFunction.set_xlabel('Sum of cards')
        self.axisValueFunction.set_ylabel('Value')
        self.axisValueFunction.legend()
        self.canvasValueFunction.draw()
示例#19
0
    def stateTransitionFct(self, state, action):
        """ Returns iterator of the successor states of *action* in *state*."""

        #Applies the action and calculates the new position and velocity
        def minmax(item, limit1, limit2):
            "Bounds item to between limit1 and limit2 (or -limit1)"
            return max(limit1, min(limit2, item))

        # Get position and velocity
        position = state["position"]
        velocity = state["velocity"]

        # Determine acceleration factor
        if action == 'left':  # action is backward thrust
            factor = -1
        elif action == 'none':  # action is coast
            factor = 0
        else:  # action is forward thrust
            factor = 1

        # Do the actual state update
        velocityChange = self.configDict["accelerationFactor"] * factor \
                                - 0.0025 * cos(3 * position)
        velocity = minmax(velocity + velocityChange, -self.maxVelocity,
                          self.maxVelocity)
        position += velocity
        position = minmax(position, self.minPosition, self.maxPosition)

        if (position <= self.minPosition) and (velocity < 0):
            velocity = 0.0

        if position >= self.goalPosition \
                    and abs(velocity) > self.configDict["maxGoalVelocity"]:
            velocity = -velocity

        yield State(
            [position, velocity],
            [self.stateSpace["position"], self.stateSpace["velocity"]]), 1.0
示例#20
0
    def getExpectedReward(self, state):
        """ Returns the expected reward for the given state """
        if self.states == None:
            return 0.0

        k = min(self.states.shape[0], self.k)

        if self.rebuildSucc:
            self.succKDTree = ann.kdtree(self.states)
            self.rebuildSucc = False

        indices, distances = self.succKDTree.knn(state, k)

        denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2)))

        # If the distances become too large, then all values can become zero
        # In this situation, we simply return the closest state and probability 1.
        if denominator == 0:
            import warnings
            warnings.warn(
                "Too large distances, returning only closest example")
            indices[0] = [indices[0][0]]
            distances[0] = [0.0]
            denominator = numpy.exp(0.0 / (self.b_Sa**2))

        expectedReward = 0.0
        for index, distance in zip(indices[0], distances[0]):
            neighbor = State(
                self.states[index],
                state.dimensions)  # TODO: not use state.dimensions

            succState, reward = self.successorSamples[neighbor]

            weight = gaussian(distance, self.b_Sa) / denominator
            expectedReward += reward * weight

        return expectedReward
    def _updateSamples(self, state, action, reward, succState, episodeTerminated):
        # Determine color
        if self.colorCriterion == "Action":
            value = action 
        elif self.colorCriterion == "Reward": 
            value = reward
        elif self.colorCriterion == "Q-Value":
            if self.evalFunction is None: return
            queryState = State((succState['x'], succState['xdot'], 
                                succState['y'], succState['ydot']), 
                               self.dimensions)
            value = self.evalFunction(queryState)
            
            self.minValue = min(value, self.minValue)
            self.maxValue = max(value, self.maxValue)

        if self.drawingEnabledCheckbox.checkState(): # Immediate drawing           
            # Remove ball patch if it is drawn currently
            if self.ballPatch != None:
                self.ballPatch.remove()
                self.ballPatch = None
                
            if self.drawStyle == "Current Position":
                # Remove old trajectory
                self._removeTrajectory()
                self.rememberedSegments = []
                # Plot ball     
                self.ballPatch = Circle([state["x"], state["y"]], 
                                        self.pinballMazeEnv.maze.ballRadius, facecolor='k') 
                self.axis.add_patch(self.ballPatch)
                self.canvas.draw()
            elif self.drawStyle == "Online (All)":   
                # If drawing was just reactivated
                self._drawRememberedSegments()
                # Draw current transition             
                lines = self.axis.plot([state["x"], succState["x"]], 
                                       [state["y"], succState["y"]], '-',
                                       color=self._determineColor(value))
                self.linePatches.extend(lines)
                self.canvas.draw()
            else: # "Last Episode"
                # Remember state trajectory, it will be drawn at the end 
                # of the episode
                self.rememberedSegments.append((state["x"], succState["x"],
                                                state["y"], succState["y"], 
                                                value))
                if episodeTerminated:
                    # Remove last trajectory, draw this episode's trajectory
                    self._removeTrajectory()
                    self._drawRememberedSegments()
                    self.canvas.draw()
                    # When coloring trajectory based on real valued criteria,
                    # we have to update the legend now 
                    if self.colorCriterion == "Q-Value":
                        self.legendWidget.clear()
                        for value in numpy.logspace(0, numpy.log10(self.maxValue - self.minValue + 1), 10):
                            value = value - 1 + self.minValue
                            
                            color = self._determineColor(value)
                            item = QtGui.QListWidgetItem(str(value), self.legendWidget)
                            qColor = QtGui.QColor(int(color[0]*255),
                                                  int(color[1]*255), 
                                                  int(color[2]*255))
                            item.setTextColor(qColor)
                            self.legendWidget.addItem(item) 
        else:
            if self.drawStyle != "Current Position":
                # Remember state trajectory, it will be drawn once drawing is
                # reenabled
                self.rememberedSegments.append((state["x"], succState["x"],
                                                state["y"], succState["y"], 
                                                value))
示例#22
0
文件: model.py 项目: mekruthi/mmlf
 def _extractState(self, stateAction):
     """ Extracts the state from the joint state-action pseudo-state """
     dimensions = [dimension for dimension in stateAction.dimensions][:-1]
     state = State(stateAction[:-1], dimensions)
     return state
示例#23
0
文件: model.py 项目: mekruthi/mmlf
    def plot(self, ax, stateSpace, plotStateDims, dimValues, plotSamples,
             colorFct, **kwargs):
        # Determine index of plot dimensions
        stateIndex1 = sorted(stateSpace.keys()).index(plotStateDims[0])
        stateIndex2 = sorted(stateSpace.keys()).index(plotStateDims[1])

        xValues = numpy.linspace(0, 1, dimValues[stateIndex1])
        yValues = numpy.linspace(0, 1, dimValues[stateIndex2])

        U = numpy.zeros((len(xValues), len(yValues)))
        V = numpy.zeros((len(xValues), len(yValues)))
        color = numpy.zeros((len(xValues), len(yValues)))
        for i in range(len(xValues)):
            for j in range(len(yValues)):
                numberOfDimensions = stateSpace.getNumberOfDimensions()
                node = numpy.zeros(numberOfDimensions)
                for k in range(numberOfDimensions):
                    if k == stateIndex1:
                        node[k] = xValues[i]
                    elif k == stateIndex2:
                        node[k] = yValues[j]
                    else:
                        node[k] = (dimValues[k] / 2 + 0.5) / dimValues[k]

                node = State(node, [
                    Dimension(
                        sorted(stateSpace.keys())[dimNum], "continuous",
                        [[0, 1]]) for dimNum in range(numberOfDimensions)
                ])

                # Find the maximum likely successor state
                p = 0.0
                maxSuccNode = node
                meanSuccNode = numpy.zeros(len(node))
                for succNode, prob in self.getSuccessorDistribution(node):
                    meanSuccNode += succNode * prob
                    if prob > p:
                        maxSuccNode = succNode
                        p = prob

                U[i, j] = meanSuccNode[stateIndex1] - node[stateIndex1]
                V[i, j] = meanSuccNode[stateIndex2] - node[stateIndex2]
                color[i, j] = colorFct(self, node, meanSuccNode)

        X, Y = numpy.meshgrid(xValues, yValues)
        ax.contourf(Y, X, color, 15)
        #        pylab.colorbar()
        # Decide whether we plot the training samples or the predictions
        if plotSamples:
            ax.scatter(self.states[:, stateIndex1],
                       self.states[:, stateIndex2],
                       marker='o',
                       c='b',
                       s=5)
        else:
            ax.quiver(Y, X, U, V)
        ax.plot(range(0))
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.set_xlabel(plotStateDims[0])
        ax.set_ylabel(plotStateDims[1])
        ax.set_xticklabels([])
        ax.set_yticklabels([])
示例#24
0
    def _plotFunction(self):
        if self.evalFunction is None:
            return

        self.lock.acquire()

        # Clean up old plot
        for patch in self.plottedPatches:
            patch.remove()
        self.plottedPatches = []

        self.colorMapping = dict()
        self.colors = cycle(["b", "g", "r", "c", "m", "y"])

        cmap = pylab.get_cmap("jet")

        # Check if the observed function returns discrete or continuous value
        discreteFunction = isinstance(self.functionObservable,
                                      FunctionOverStateSpaceObservable) \
                                and self.functionObservable.discreteValues
        if not discreteFunction:
            # The values of the observed function over the 2d state space
            values = numpy.ma.array(numpy.zeros(
                (self.maze.getColumns(), self.maze.getRows())),
                                    mask=numpy.zeros((self.maze.getColumns(),
                                                      self.maze.getRows())))

        # Iterate over all states and compute the value of the observed function
        dimensions = [
            self.stateSpace[dimName] for dimName in ["column", "row"]
        ]
        for column in range(self.maze.getColumns()):
            for row in range(self.maze.getRows()):
                # Create state object
                state = State((column, row), dimensions)
                # Evaluate function for this state
                if isinstance(self.functionObservable,
                              FunctionOverStateSpaceObservable):
                    functionValue = self.evalFunction(state)
                else:  # StateActionValuesObservable
                    # Determine chosen option first
                    selectedOption = None
                    for option in self.actions:
                        selectedOptionName = str(
                            self.suboptionComboBox.currentText())
                        if str(option) == selectedOptionName:
                            selectedOption = option
                            break
                    assert selectedOption is not None
                    functionValue = self.evalFunction(state, option)

                # Map function value onto color value
                if discreteFunction:
                    # Deal with situations where the function is only defined over
                    # part of the state space
                    if functionValue == None or functionValue in [
                            numpy.nan, numpy.inf, -numpy.inf
                    ]:
                        continue
                    # Determine color value for function value
                    if not functionValue in self.colorMapping:
                        # Choose value for function value that occurrs for the
                        # first time
                        self.colorMapping[functionValue] = self.colors.next()
                    patch = self.maze.plotSquare(
                        self.axis, (column, row),
                        self.colorMapping[functionValue])
                    self.plottedPatches.append(patch[0])
                else:
                    # Remember values since we have to know the min and max value
                    # before we can plot
                    values[column, row] = functionValue
                    if functionValue == None or functionValue in [
                            numpy.nan, numpy.inf, -numpy.inf
                    ]:
                        values.mask[column, row] = True

        # Do the actual plotting for functions with continuous values
        if not discreteFunction:
            minValue = values.min()
            maxValue = values.max()
            for column in range(self.maze.getColumns()):
                for row in range(self.maze.getRows()):
                    if values.mask[column, row]: continue
                    value = (values[column, row] - minValue) / (maxValue -
                                                                minValue)
                    patch = self.maze.plotSquare(self.axis, (column, row),
                                                 cmap(value),
                                                 zorder=0)
                    self.plottedPatches.append(patch[0])

        # Set limits
        self.axis.set_xlim(0, len(self.maze.structure[0]) - 1)
        self.axis.set_ylim(0, len(self.maze.structure) - 1)

        # Update legend
        self.legendWidget.clear()
        if discreteFunction:
            for functionValue, colorValue in self.colorMapping.items():
                if isinstance(functionValue, tuple):
                    functionValue = functionValue[0]  # deal with '(action,)'
                rgbaColor = matplotlib.colors.ColorConverter().to_rgba(
                    colorValue)
                item = QtGui.QListWidgetItem(str(functionValue),
                                             self.legendWidget)
                color = QtGui.QColor(int(rgbaColor[0] * 255),
                                     int(rgbaColor[1] * 255),
                                     int(rgbaColor[2] * 255))
                item.setTextColor(color)
                self.legendWidget.addItem(item)
        else:
            for value in numpy.linspace(values.min(), values.max(), 10):
                rgbaColor = cmap(
                    (value - values.min()) / (values.max() - values.min()))
                item = QtGui.QListWidgetItem(str(value), self.legendWidget)
                color = QtGui.QColor(int(rgbaColor[0] * 255),
                                     int(rgbaColor[1] * 255),
                                     int(rgbaColor[2] * 255))
                item.setTextColor(color)
                self.legendWidget.addItem(item)

        self.canvas.draw()

        self.lock.release()
示例#25
0
    def plot(self,
             function,
             actions,
             fig,
             stateSpace,
             plotStateDims=None,
             plotActions=None,
             rasterPoints=100):
        """ Plots the q-Function for the case of a 2-dim subspace of the state space. 
        
        plotStateDims :The 2 dims that should be plotted
        plotActions : The action that should be plotted
        rasterPoints : How many raster points per dimension
        """
        # All actions that should be plotted
        if plotActions == None:
            plotActions = actions
        else:
            # Check if plot actions are valid actions
            for i in range(len(plotActions)):
                if plotActions[i] in actions: continue  # ok...
                try:
                    plotActions[i] = eval(plotActions[i])
                except:
                    raise Exception("Invalid plot action %s" % plotActions[i])

        # Determine the indices of the dimension that should be plotted
        if plotStateDims == None or plotStateDims == []:
            if len(stateSpace.items()) != 2:
                warnings.warn("%s: Not two state space dimensions."
                              "Please specify plotStateDims explicitly. " %
                              self.__class__.__name__)
                return
            plotStateDims = [stateSpace.keys()[0], stateSpace.keys()[1]]
        elif len(plotStateDims) != 2:
            warnings.warn(
                "%s: StateActionValuesObservable logging only defined when "
                "2 plotStateDims  are explicitly specified." %
                self.__class__.__name__)
            return

        # Prepare plotting
        fig.subplots_adjust(left=0.05,
                            right=0.95,
                            bottom=0.05,
                            top=0.95,
                            wspace=0.1,
                            hspace=0.2)

        # Different plotting for discrete and continuous dimensions
        if stateSpace.hasContinuousDimensions():
            # Generate 2d state slice
            defaultDimValues = {}
            for dimensionName in stateSpace.keys():
                defaultDimValues[dimensionName] = 0.5
            stateSlice = generate2dStateSlice(plotStateDims,
                                              stateSpace,
                                              defaultDimValues,
                                              gridNodesPerDim=rasterPoints)

            rows = int(math.ceil(len(plotActions) / 2.0))
            data = dict()
            # determine absolute min and max to align subplots colormaps
            # initialized that will be exceeded in any case by the corresponding comparison
            absmin = float('inf')
            absmax = -float('inf')
            # For all actions that should be plotted
            for plotNum, action in enumerate(plotActions):
                # Compute values that should be plotted, colormapping == {} in continuous case
                values, colorMapping = \
                        generate2dPlotArray(lambda state : function(state, action),
                                            stateSlice, True,
                                            shape=(rasterPoints, rasterPoints))

                # Check if there is something to plot
                if values.mask.all():
                    continue

                # some comparison for colormap alignment
                thismin = values.min()
                if thismin < absmin:
                    absmin = thismin
                thismax = values.max()
                if thismax > absmax:
                    absmax = thismax

                # save the data to plot later, when ranges are known
                data[(plotNum, action)] = values.T

            # plot the data
            for plotNum, action in data.keys():
                # add subplot
                subplot = fig.add_subplot(rows, 2, plotNum + 1)
                subplot.clear()

                # create pseudocolorplot in current subplot
                polyCollection = fig.gca().pcolor(
                    numpy.linspace(0.0, 1.0, rasterPoints),
                    numpy.linspace(0.0, 1.0, rasterPoints),
                    data[(plotNum, action)],
                    vmin=absmin,
                    vmax=absmax)

                # Add colorbar
                fig.colorbar(polyCollection)

                # Labeling etc.
                subplot.set_xlim(0, 1)
                subplot.set_ylim(0, 1)
                subplot.set_xlabel(plotStateDims[0])
                subplot.set_ylabel(plotStateDims[1])
                subplot.set_title(action)
        else:
            assert (len(stateSpace.items()) == 2), \
                    "Discrete state spaces can only be plotted if they have two dimensions."
            valuesX = stateSpace[plotStateDims[0]]["dimensionValues"]
            valuesY = stateSpace[plotStateDims[1]]["dimensionValues"]
            stateSlice = {}
            from mmlf.framework.state import State
            for i, valueX in enumerate(valuesX):
                for j, valueY in enumerate(valuesY):
                    # Create state object
                    stateSlice[(i, j)] = State([valueX, valueY], [
                        stateSpace[plotStateDims[0]],
                        stateSpace[plotStateDims[1]]
                    ])

            rows = int(math.ceil(len(plotActions) / 2.0))
            # For all actions that should be plotted
            for plotNum, action in enumerate(plotActions):
                # Clear old plot
                subplot = fig.add_subplot(rows, 2, plotNum + 1)
                subplot.clear()
                # Compute values that should be plotted
                values, colorMapping = \
                        generate2dPlotArray(lambda state : function(state, action),
                                            stateSlice, True,
                                            shape=(len(valuesX), len(valuesY)))

                # Check if there is something to plot
                if values.mask.all():
                    continue

                # Do the actual plotting
                polyCollection = \
                    fig.gca().pcolor(numpy.array(valuesX) - 0.5,
                                     numpy.array(valuesY) - 0.5, values.T)

                # Add colorbar
                fig.colorbar(polyCollection)

                # Labeling etc.
                subplot.set_xlim(min(valuesX), max(valuesX))
                subplot.set_ylim(min(valuesY), max(valuesY))
                subplot.set_xlabel(plotStateDims[0])
                subplot.set_ylabel(plotStateDims[1])
                subplot.set_title(action)
示例#26
0
    def plot(self,
             function,
             fig,
             stateSpace,
             actionSpace,
             plotStateDims=None,
             rasterPoints=100):
        """ Creates a graphical representation of a FunctionOverStateSpace.
        
        Creates a plot of *policy* in the 2D subspace of the state space
        spanned by *stateIndex1* and *stateIndex2*. 
        """
        # Determine the indices of the dimension that should be plotted
        if plotStateDims == None or plotStateDims == []:
            if len(stateSpace.items()) != 2:
                warnings.warn("%s: Not two state space dimensions. "
                              "Please specify plotStateDims explicitly. " %
                              self.__class__.__name__)
                return
            plotStateDims = [stateSpace.keys()[0], stateSpace.keys()[1]]
        elif len(plotStateDims) != 2:
            warnings.warn(
                "%s: FunctionOverStateSpace logging only defined when "
                "2 plotStateDims  are explicitly specified." %
                self.__class__.__name__)
            return

        # Prepare plotting
        fig.subplots_adjust(left=0.05,
                            right=0.95,
                            bottom=0.05,
                            top=0.95,
                            wspace=0.1,
                            hspace=0.1)

        # Different plotting for discrete and continuous dimensions
        if stateSpace.hasContinuousDimensions():
            # Generate 2d state slice
            defaultDimValues = {}
            for dimensionName in stateSpace.keys():
                defaultDimValues[dimensionName] = 0.5
            stateSlice = generate2dStateSlice(plotStateDims,
                                              stateSpace,
                                              defaultDimValues,
                                              gridNodesPerDim=rasterPoints)

            # Compute values that should be plotted
            values, colorMapping = \
                        generate2dPlotArray(function, stateSlice,
                                            not self.discreteValues,
                                            shape=(rasterPoints, rasterPoints))

            # Check if there is something to plot
            if values.mask.all():
                return

            # Do the actual plotting
            polyCollection = \
                fig.gca().pcolor(numpy.linspace(0.0, 1.0, rasterPoints),
                                 numpy.linspace(0.0, 1.0, rasterPoints),
                                 values.T)

            # Polishing of figure
            fig.gca().set_xlim(0.0, 1.0)
            fig.gca().set_ylim(0.0, 1.0)
        else:
            assert (len(stateSpace.items()) == 2), \
                    "Discrete state spaces can only be plotted if they have two dimensions."
            valuesX = stateSpace[plotStateDims[0]]["dimensionValues"]
            valuesY = stateSpace[plotStateDims[1]]["dimensionValues"]
            stateSlice = {}
            from mmlf.framework.state import State
            for i, valueX in enumerate(valuesX):
                for j, valueY in enumerate(valuesY):
                    # Create state object
                    stateSlice[(i, j)] = State([valueX, valueY], [
                        stateSpace[plotStateDims[0]],
                        stateSpace[plotStateDims[1]]
                    ])

            # Compute values that should be plotted
            values, colorMapping = \
                        generate2dPlotArray(function, stateSlice,
                                            not self.discreteValues,
                                            shape=(len(valuesX), len(valuesY)))

            polyCollection = \
                fig.gca().pcolor(numpy.array(valuesX) - 0.5,
                                 numpy.array(valuesY) - 0.5,
                                 values.T)

            # Polishing of figure
            fig.gca().set_xlim(min(valuesX), max(valuesX))
            fig.gca().set_ylim(min(valuesY), max(valuesY))

        fig.gca().set_xlabel(plotStateDims[0])
        fig.gca().set_ylabel(plotStateDims[1])
        # Create legend respective colorbar
        if not self.discreteValues:
            fig.colorbar(polyCollection)
        else:
            # Some dummy code that creates patches that are not shown but allow
            # for a colorbar
            from matplotlib.patches import Rectangle
            linearSegmentedColorbar = polyCollection.get_cmap()
            patches = []
            functionValues = []
            for functionValue, colorValue in colorMapping.items():
                if isinstance(functionValue, tuple):
                    functionValue = functionValue[0]  # deal with '(action,)'
                normValue = polyCollection.norm(colorValue)
                if isinstance(normValue, numpy.ndarray):
                    normValue = normValue[
                        0]  # happens when function is constant
                rgbaColor = linearSegmentedColorbar(normValue)

                p = Rectangle((0, 0), 1, 1, fc=rgbaColor)
                functionValues.append(functionValue)
                patches.append(p)
            fig.gca().legend(patches, functionValues)