def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
     if self.onesigma:
         # algorithm for one global sigma for all mu's
         expln_params = expln(self.params)
         sumxsquared = dot(self.state, self.state)
         self._derivs += (
             sum((outbuf - inbuf) ** 2 - expln_params ** 2 * sumxsquared) 
             / expln_params * explnPrime(self.params)
         )
         inerr[:] = (outbuf - inbuf)
     
         if not self.autoalpha and sumxsquared != 0:
             inerr /= expln_params ** 2 * sumxsquared
             self._derivs /= expln_params ** 2 * sumxsquared
     else:
         # Algorithm for seperate sigma for each mu
         expln_params = expln(self.params
                         ).reshape(len(outbuf), len(self.state))
         explnPrime_params = explnPrime(self.params
                         ).reshape(len(outbuf), len(self.state))
     
         idx = 0
         for j in xrange(len(outbuf)):
             sigma_subst2 = dot(self.state ** 2, expln_params[j, :]**2) 
             for i in xrange(len(self.state)):
                 self._derivs[idx] = ((outbuf[j] - inbuf[j]) ** 2 - sigma_subst2) / sigma_subst2 * \
                     self.state[i] ** 2 * expln_params[j, i] * explnPrime_params[j, i]
                 if self.autoalpha and sigma_subst2 != 0:
                     self._derivs[idx] /= sigma_subst2
                 idx += 1
             inerr[j] = (outbuf[j] - inbuf[j])
             if not self.autoalpha and sigma_subst2 != 0:
                 inerr[j] /= sigma_subst2
Пример #2
0
    def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
        if self.onesigma:
            # algorithm for one global sigma for all mu's
            expln_params = expln(self.params)
            sumxsquared = dot(self.state, self.state)
            self._derivs += (
                sum((outbuf - inbuf) ** 2 - expln_params ** 2 * sumxsquared) / expln_params * explnPrime(self.params)
            )
            inerr[:] = outbuf - inbuf

            if not self.autoalpha and sumxsquared != 0:
                inerr /= expln_params ** 2 * sumxsquared
                self._derivs /= expln_params ** 2 * sumxsquared
        else:
            # Algorithm for seperate sigma for each mu
            expln_params = expln(self.params).reshape(len(outbuf), len(self.state))
            explnPrime_params = explnPrime(self.params).reshape(len(outbuf), len(self.state))

            idx = 0
            for j in xrange(len(outbuf)):
                sigma_subst2 = dot(self.state ** 2, expln_params[j, :] ** 2)
                for i in xrange(len(self.state)):
                    self._derivs[idx] = (
                        ((outbuf[j] - inbuf[j]) ** 2 - sigma_subst2)
                        / sigma_subst2
                        * self.state[i] ** 2
                        * expln_params[j, i]
                        * explnPrime_params[j, i]
                    )
                    if self.autoalpha and sigma_subst2 != 0:
                        self._derivs[idx] /= sigma_subst2
                    idx += 1
                inerr[j] = outbuf[j] - inbuf[j]
                if not self.autoalpha and sigma_subst2 != 0:
                    inerr[j] /= sigma_subst2
Пример #3
0
    def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
        expln_params = expln(self.params)
        self._derivs += ((outbuf - inbuf) ** 2 - expln_params ** 2) / expln_params * explnPrime(self.params)
        inerr[:] = outbuf - inbuf

        if not self.autoalpha:
            inerr /= expln_params ** 2
            self._derivs /= expln_params ** 2
Пример #4
0
 def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
     expln_params = expln(self.params)
     self._derivs += ((outbuf - inbuf)**2 - expln_params**2) / expln_params * explnPrime(self.params)
     inerr[:] = (outbuf - inbuf)
     
     if not self.autoalpha:
         inerr /= expln_params**2
         self._derivs /= expln_params**2
Пример #5
0
 def __init__(self, statedim, actiondim, sigma= -2.):
     Explorer.__init__(self, actiondim, actiondim)
     self.statedim = statedim
     self.actiondim = actiondim
     
     # initialize parameters to sigma
     ParameterContainer.__init__(self, actiondim, stdParams=0)
     self.sigma = [sigma] * actiondim
     
     # exploration matrix (linear function)
     self.explmatrix = random.normal(0., expln(self.sigma), (statedim, actiondim))
     
     # store last state
     self.state = None
Пример #6
0
    def __init__(self, statedim, actiondim, sigma= -2.):
        Explorer.__init__(self, actiondim, actiondim)
        self.statedim = statedim
        self.actiondim = actiondim

        # initialize parameters to sigma
        ParameterContainer.__init__(self, actiondim, stdParams=0)
        self.sigma = [sigma] * actiondim

        # exploration matrix (linear function)
        self.explmatrix = random.normal(0., expln(self.sigma), (statedim, actiondim))

        # store last state
        self.state = None
Пример #7
0
    def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
        expln_params = expln(self.params).reshape(len(outbuf), len(self.state))
        explnPrime_params = explnPrime(self.params).reshape(
            len(outbuf), len(self.state))

        idx = 0
        for j in range(len(outbuf)):
            sigma_subst2 = dot(self.state**2, expln_params[j, :]**2)
            for i in range(len(self.state)):
                self._derivs[idx] = ((outbuf[j] - inbuf[j]) ** 2 - sigma_subst2) / sigma_subst2 * \
                    self.state[i] ** 2 * expln_params[j, i] * explnPrime_params[j, i]
                # if self.autoalpha and sigma_subst2 != 0:
                # self._derivs[idx] /= sigma_subst2
                idx += 1
            inerr[j] = (outbuf[j] - inbuf[j])
Пример #8
0
    def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
        expln_params = expln(self.params
                        ).reshape(len(outbuf), len(self.state))
        explnPrime_params = explnPrime(self.params
                        ).reshape(len(outbuf), len(self.state))

        idx = 0
        for j in xrange(len(outbuf)):
            sigma_subst2 = dot(self.state ** 2, expln_params[j, :]**2)
            for i in xrange(len(self.state)):
                self._derivs[idx] = ((outbuf[j] - inbuf[j]) ** 2 - sigma_subst2) / sigma_subst2 * \
                    self.state[i] ** 2 * expln_params[j, i] * explnPrime_params[j, i]
                # if self.autoalpha and sigma_subst2 != 0:
                # self._derivs[idx] /= sigma_subst2
                idx += 1
            inerr[j] = (outbuf[j] - inbuf[j])
 def drawRandomWeights(self):
     self.module._setParameters(
         random.normal(0, expln(self.params), self.module.paramdim)) 
Пример #10
0
 def _forwardImplementation(self, inbuf, outbuf):
     if not self.enabled:
         outbuf[:] = inbuf
     else:
         outbuf[:] = random.normal(inbuf, expln(self.params))
Пример #11
0
 def newEpisode(self):
     """ Randomize the matrix values for exploration during one episode. """
     self.explmatrix = random.normal(0., expln(self.sigma), self.explmatrix.shape)
Пример #12
0
 def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
     expln_sigma = expln(self.sigma)
     self._derivs += ((outbuf - inbuf) ** 2 - expln_sigma ** 2) / expln_sigma * explnPrime(self.sigma)
     inerr[:] = (outbuf - inbuf)
Пример #13
0
 def _forwardImplementation(self, inbuf, outbuf):
     outbuf[:] = random.normal(inbuf, expln(self.sigma))
Пример #14
0
 def newEpisode(self):
     """ Randomize the matrix values for exploration during one episode. """
     self.explmatrix = random.normal(0., expln(self.sigma), self.explmatrix.shape)
Пример #15
0
 def _forwardImplementation(self, inbuf, outbuf):
     if not self.enabled:
         outbuf[:] = inbuf
     else:
         outbuf[:] = random.normal(inbuf, expln(self.params))
Пример #16
0
 def drawRandomWeights(self):
     self.module._setParameters(random.normal(0, expln(self.params), self.module.paramdim))
Пример #17
0
 def _backwardImplementation(self, outerr, inerr, outbuf, inbuf):
     expln_sigma = expln(self.sigma)
     self._derivs += ((outbuf - inbuf) ** 2 - expln_sigma ** 2) / expln_sigma * explnPrime(self.sigma)
     inerr[:] = outbuf - inbuf
Пример #18
0
 def _forwardImplementation(self, inbuf, outbuf):
     outbuf[:] = random.normal(inbuf, expln(self.sigma))