Пример #1
0
#                print 'added feature'
                self.addNewWeight()
                addedFeature        = True
                self.features_num   += 1
                self.logger.log('Added feature. \t %d total feats' % self.features_num)
            else: 
                break
        return addedFeature
    def featureType(self):
        return float

if __name__ == '__main__':
    STDOUT_FILE         = 'out.txt'
    JOB_ID              = 1
    OUT_PATH            = 'Results/Temp'
#    logger              = Logger('%s/%d-%s'%(OUT_PATH,JOB_ID,STDOUT_FILE))
    logger              = Logger()
    discovery_threshold = 1
    domain      = GridWorld()
    rep         = BEBF(domain,logger,debug=1,batchThreshold = 10 ** -5)
    rep.theta   = arange(rep.features_num*domain.actions_num)*10
    print 'initial features'
    print rep.features_num,'---',rep.features
    s           = domain.s0()
    a           = domain.possibleActions(s)
    a = a[0]
    r,ns,terminal   = domain.step(s, a)
    print 'step 2 r,ns',r,ns
    
    
    
Пример #2
0
        r = v / v.sum()
        assert not np.any(np.isnan(r))
        return r


if __name__ == "__main__":

    # Finite differences check of dlogpi
    from Domains import GridWorld
    from Representations import Tabular
    from scipy.optimize import check_grad, approx_fprime

    MAZE = "./Domains/GridWorldMaps/4x5.txt"
    NOISE = 0.3
    logger = Logger()
    domain = GridWorld(MAZE, noise=NOISE, logger=logger)
    representation = Tabular(logger=logger, domain=domain, discretization=20)
    policy = GibbsPolicy(representation=representation, logger=logger)

    def f(theta, s, a):
        policy.representation.theta = theta
        return np.log(policy.prob(s, a))

    def df(theta, s, a):
        policy.representation.theta = theta
        return policy.dlogpi(s, a)

    def df_approx(theta, s, a):
        return approx_fprime(theta, f, 1e-10, s, a)

    thetas = np.random.rand(10, len(representation.theta))