Exemplo n.º 1
    def samplePredecessorState(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        def computeProbabilityFct(predState):
            if self.stateCounter[predState] == 0:
                return 0.0
                return float(self.invStateTransitions[(state, predState)]) \
                                    / self.stateCounter[predState]

        probabilityMassFunction = [(predState,
                                   for predState in self.states]

        randValue = random.random()
        accumulator = 0.0
        for predState, probabilityMass in probabilityMassFunction:
            accumulator += probabilityMass
            if accumulator >= randValue:
                return predState
Exemplo n.º 2
 def sampleState(self):
     """ Return a state drawn randomly """
     stateDensity = self.exampleSet.getStateDensity()
     if stateDensity != None:
         # TODO: Does it make sense to sample based on the data set?
         return State(stateDensity.resample(1).T[0])
         raise ModelNotInitialized()
Exemplo n.º 3
 def sampleSuccessorState(self, state):
     """ Return a state drawn from the state's successor distribution """
     if self._retrainingRequired():
     if self.succStateModel != None:
         return State(state + self.succStateModel.predict(state),
         raise ModelNotInitialized()
Exemplo n.º 4
    def getExpectedReward(self, state):
        """ Returns the expected reward for the given state *state* """
        if self._retrainingRequired():

        if self.rewardModel != None and self.exampleSet.states != None:
            nearestNeighbor = self.exampleSet.getNearestNeighbor(state)
            return float(self.rewardModel[nearestNeighbor])
            raise ModelNotInitialized()
Exemplo n.º 5
    def getSuccessorDistribution(self, state):
        """ Return the successor distribution for the given state. 
        Returns an iterator that yields pairs of grid nodes and
        their probabilities of being the successor of the given state. 
        if self._retrainingRequired():

        if self.succStateModel != None:
            # This is a deterministic model!
            yield (State(state + self.succStateModel.predict(state),
                         state.dimensions), 1.0)
            raise ModelNotInitialized()
Exemplo n.º 6
    def getExplorationValue(self, state):
        """ Return the exploratory value of the given state *state*
        The exploratory value of a state under this model is defined simply as
        the euclidean distance of from the state to its nearest neighbor in the
        example set. 
        if self._retrainingRequired():

        if self.exampleSet.states != None:
            nearestNeighbor = self.exampleSet.getNearestNeighbor(state)
            dist = numpy.linalg.norm(nearestNeighbor - state)
            return -dist
            raise ModelNotInitialized()
Exemplo n.º 7
    def getSuccessorDistribution(self, state):
        """ Return the successor distribution for the given *state*. 
        Returns an iterator that yields pairs of states and
        their probabilities of being the successor of the given *state*. 
        if self.states == None:
            raise ModelNotInitialized()

        k = min(self.states.shape[0], self.k)

        if self.rebuildSucc:
            self.succKDTree = ann.kdtree(self.states)
            self.rebuildSucc = False

        indices, distances = self.succKDTree.knn(state, k)

        denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2)))

        # If the distances become too large, then all values can become zero
        # In this situation, we simply return the closest state and probability 1.
        if denominator == 0 or numpy.isnan(denominator):
            import warnings
                "Too large distances, returning only closest example")
            indices[0] = [indices[0][0]]
            distances[0] = [0.0]
            denominator = numpy.exp(0.0 / (self.b_Sa**2))

        for index, distance in zip(indices[0], distances[0]):
            neighbor = State(
                state.dimensions)  # TODO: not use state.dimensions
            succState, reward = self.successorSamples[neighbor]

            delta = succState - neighbor
            predictedSuccState = State(state + delta, state.dimensions)

            if not 0 <= gaussian(distance, self.b_Sa) / denominator <= 1:
                import warnings
                import sys
                warnings.warn("Invalid distances in KNN Model!")
                print distances

            yield predictedSuccState, gaussian(distance,
                                               self.b_Sa) / denominator
Exemplo n.º 8
    def getNearestNeighbors(self, state, k, b):
        """ Determines *k* most similar states to the given *state*
        Determines *k* most similar states to the given *state*. Returns an
        iterator over (weight, neighbor), where weight is the guassian weigthed
        influence of the neighbor onto *state*. The weight is computed via
        Note that the weights sum to 1.
        if self.states is not None:
            k = min(k, self.states.shape[1])

            if hasattr(self,
                       "kdTree"):  # if we can use approximate nearest neighbor
                indices, distances = self.kdTree.knn(state, k=k)

                # Compute weights based on distance
                weights = numpy.exp(-distances[0] / (b**2))
                denominator = numpy.sum(weights)

                # If the distances become too large, then all values can become zero
                # In this situation, we simply return the closest state and probability 1.
                if denominator == 0:
                    import warnings
                        "Too large distances, returning only closest example")
                    indices[0] = [indices[0][0]]
                    weights[0] = 1.0
                    # Normalize weights
                    weights = weights / denominator

                for index, weight in zip(indices[0], weights):
                    yield weight, State(self.states.T[index], state.dimensions)
                assert k == 1
                minDist = numpy.inf
                closestSample = None
                for index in range(self.states.shape[1]):
                    sampleState = self.states.T[index]
                    dist = numpy.linalg.norm(state - sampleState)
                    if dist < minDist:
                        minDist = dist
                        closestSample = sampleState
                yield 1.0, State(closestSample, state.dimensions)
            raise ModelNotInitialized("No state samples available")
Exemplo n.º 9
    def samplePredecessorState(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        if self.succStates == None:
            raise ModelNotInitialized()

        predDistr = self.getPredecessorDistribution(state)
        randVal = random.uniform(0, 1)
        cumProb = 0.0
        for predState, predProb in predDistr:
            cumProb += predProb
            if cumProb >= randVal:
                return predState

        assert False, "No predecessor state has been found!"
Exemplo n.º 10
    def drawTransitions(self, samples):
        """ Returns a random iterator over the transitions
        Returns a random iterator over the transitions that yields *samples* 
        number of transitions from the dataset. If more samples are requested
        than contained in the data set, then data is reused.
        if self.states is None:
            raise ModelNotInitialized()

        counter = 0
        while True:
            for i in numpy.random.permutation(range(self.states.shape[1])):
                yield (self.states[:, i], self.succStates[:,
                                                          i], self.rewards[:,
                counter += 1
                if counter >= samples: return
Exemplo n.º 11
    def getSuccessorDistribution(self, state):
        """ Return the successor distribution for the given *state*. 
        Returns an iterator that yields pairs of states and
        their probabilities of being the successor of the given *state*. 
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        computeProbabilityFct = \
            lambda succState: float(self.stateTransitions[(state, succState)]) \
                                    / self.stateCounter[state]
        probabilityMassFunction = [(succState,
                                   for succState in self.states]

        for succState, probabilityMass in probabilityMassFunction:
            if probabilityMass > 0.0:
                yield succState, probabilityMass
Exemplo n.º 12
    def getPredecessorDistribution(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        if self.succStates == None:
            raise ModelNotInitialized()

        k = min(self.states.shape[0], self.k)

        if self.rebuildPred:
            self.predKDTree = ann.kdtree(self.succStates)
            self.rebuildPred = False

        indices, distances = self.predKDTree.knn(state, k)

        denominator = numpy.sum(numpy.exp(-distances[0] / (self.b_Sa**2)))

        # If the distances become too large, then all values can become zero
        # In this situation, we simply return the closest state and probability 1.
        if denominator == 0:
            import warnings
            warnings.warn("Too large distances, returing only closest example")
            indices[0] = [indices[0][0]]
            distances[0] = [0.0]
            denominator = numpy.exp(0.0 / (self.b_Sa**2))

        for index, distance in zip(indices[0], distances[0]):
            neighbor = State(
                state.dimensions)  # TODO: not use state.dimensions
            predState, reward = self.predecessorSamples[neighbor]

            delta = predState - neighbor
            predictedPredState = State(state + delta, state.dimensions)

            yield predictedPredState, gaussian(distance,
                                               self.b_Sa) / denominator
Exemplo n.º 13
    def sampleSuccessorState(self, state):
        """ Return a states drawn from *state*'s successor distribution 
        Returns a possible successor state of *state* drawn from the successor 
        state distribution according to its probability mass function.
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        computeProbabilityFct = \
            lambda succState: float(self.stateTransitions[(state, succState)]) \
                                    / self.stateCounter[state]
        probabilityMassFunction = [(succState,
                                   for succState in self.states]

        randValue = random.random()
        accumulator = 0.0
        for succState, probabilityMass in probabilityMassFunction:
            accumulator += probabilityMass
            if accumulator >= randValue:
                return succState
Exemplo n.º 14
    def getPredecessorDistribution(self, state):
        """ Return a states drawn from *state*'s predecessor distribution 
        Returns a possible predecessor state of *state* drawn from the 
        predecessor state distribution according to its probability mass function.
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        def computeProbabilityFct(predState):
            if self.stateCounter[predState] == 0:
                return 0.0
                return float(self.invStateTransitions[(state, predState)]) \
                                    / self.stateCounter[predState]

        probabilityMassFunction = [(predState,
                                   for predState in self.states]

        for predState, probabilityMass in probabilityMassFunction:
            if probabilityMass > 0.0:
                yield predState, probabilityMass
Exemplo n.º 15
    def sampleState(self):
        """ Return a known state randomly sampled with uniform distribution"""
        if len(self.stateCounter.keys()) == 0:
            raise ModelNotInitialized()

        return random.choice(self.stateCounter.keys())
Exemplo n.º 16
    def getExpectedReward(self, state):
        """ Returns the expected reward for the given state """
        if self.stateCounter[state] == 0:
            raise ModelNotInitialized()

        return self.accumulatedReward[state] / self.stateCounter[state]