Exemplo n.º 1
0
 def __init__(self):
     self.env = MountainCar()
     self.noiseRange = 1.0
     self.om = 0
     self.alpha = 0.6
     self.beta = 0.4
     self.t = 0
     self.totStep = 0
     self.r = 0
     self.ep = 0
     self.perfs = result_log(algo="DDPG", l1=20, l2=10)
     self.actif = True
Exemplo n.º 2
0
 def __init__(self, logger = None):
     self.env = MountainCar()
     self.noiseRange = 1.0
     self.noiseMax = 1.0
     self.om = 0
     self.alpha = 0.6
     self.beta = 0.4
     self.t = 0
     self.totStep = 0
     self.r = 0
     self.ep = 0
     if logger==None:
         self.perfs = result_log(algo="DDPG", l1=20, l2=10)
     else:
         self.perfs = logger
     self.actif = True
Exemplo n.º 3
0
Arquivo: cma_mc.py Projeto: MOCR/DDPG
    if CMA_obj.totStep != term_callback.lastCall:
        term_callback.lastCall = CMA_obj.totStep
        CMA_obj.act.load_parameters(c.mean)
        CMA_obj.env.reset(noise = False)
        ret = 0
        while not CMA_obj.env.isFinished():
            actn, r = CMA_obj.env.act(CMA_obj.act.action_batch(CMA_obj.env.state()))
            ret += r[0]
        term_callback.plot_data.addData(CMA_obj.totStep, CMA_obj.env.t, ret)
    return False

CMA_obj.env = MountainCarEnv()
CMA_obj.act = simple_actor_network(2, 1, l1_size = 20, l2_size = 10)
CMA_obj.totStep = 0

term_callback.plot_data = result_log("CMA-ES", 20,10)
term_callback.lastCall = -1
print "Going for CMA-ES"
op = cma.CMAOptions()
op['termination_callback'] = term_callback
op['maxiter']=200
x=  CMA_obj.act.linear_parameters()
fx = cma.fmin(CMA_obj, x, 0.5, options = op)

term_callback.plot_data.save()

def draw_politic():
    plt.close()
    act = CMA_obj.act
    img = np.zeros((200, 200))
    pos = -1.
Exemplo n.º 4
0
        CMA_obj.act.load_parameters(c.mean)
        CMA_obj.env.reset(noise=False)
        ret = 0
        while not CMA_obj.env.isFinished():
            actn, r = CMA_obj.env.act(
                CMA_obj.act.action_batch(CMA_obj.env.state()))
            ret += r[0]
        term_callback.plot_data.addData(CMA_obj.totStep, CMA_obj.env.t, ret)
    return False


CMA_obj.env = MountainCarEnv()
CMA_obj.act = simple_actor_network(2, 1, l1_size=20, l2_size=10)
CMA_obj.totStep = 0

term_callback.plot_data = result_log("CMA-ES", 20, 10)
term_callback.lastCall = -1
print "Going for CMA-ES"
op = cma.CMAOptions()
op['termination_callback'] = term_callback
op['maxiter'] = 200
x = CMA_obj.act.linear_parameters()
fx = cma.fmin(CMA_obj, x, 0.5, options=op)

term_callback.plot_data.save()


def draw_politic():
    plt.close()
    act = CMA_obj.act
    img = np.zeros((200, 200))
Exemplo n.º 5
0
@author: arnaud
"""

from DDPG.core.DDPG_core import DDPG
import numpy as np

import DDPG.environement.instance.mountainCarEnv as mc
from DDPG.core.networks.simple_actor_network import simple_actor_network
from DDPG.logger.result import result_log

l1 = 20
l2 = 10
rate = 0.001

env = mc.MountainCarEnv(result_log("DDPG", l1, l2))
a_c = DDPG(env,
           actor=simple_actor_network(2,
                                      1,
                                      l1_size=l1,
                                      l2_size=l2,
                                      learning_rate=rate))


def voidFunc():
    pass


env.extern_draw = voidFunc

Exemplo n.º 6
0
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 21 13:31:15 2016

@author: arnaud
"""

from DDPG.core.DDPG_core import DDPG
import numpy as np

import DDPG.environement.instance.mountainCarEnv as mc
from DDPG.core.networks.simple_actor_network import simple_actor_network
from DDPG.logger.result import result_log

l1 = 20
l2 = 10
rate = 0.001

env = mc.MountainCarEnv(result_log("DDPG", l1, l2))
a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = l1, l2_size = l2, learning_rate = rate))

def voidFunc():
    pass

env.extern_draw = voidFunc

def doEp(M, T=float("inf")):
    a_c.M_episodes(M, T)
    env.perfs.save()
doEp(4000)
Exemplo n.º 7
0
Arquivo: test_mc.py Projeto: MOCR/DDPG
"""

from DDPG.core.DDPG_core import DDPG
import numpy as np

import DDPG.environement.instance.mountainCarEnv as mc
from DDPG.core.networks.simple_actor_network import simple_actor_network
from DDPG.core.networks.simple_critic_network import simple_critic_network

import matplotlib.pyplot as plt
from DDPG.logger.result import result_log

l1 = 20
l2 = 10

logger = result_log("DDPG", l1, l2, "simple_"+str(l1)+"_"+str(l2))

env = mc.MountainCarEnv(logger)
a_c = DDPG(env, actor = simple_actor_network(2, 1, l1_size = l1, l2_size = l2, learning_rate = 0.005), critic = simple_critic_network(2, 1, l1_size = 20, l2_size = 10, learning_rate = 0.01))
    
def draw_politic():
    plt.close()
    ac= a_c
    img = np.zeros((200, 200))
    pos = -1.
    batch = []
    for i in range(200):
        vel = -1.
        pos += 0.01
        for j in range(200):
            vel += 0.01