示例#1
0
 def run(env):
     from lola.train_pg import train
     train(env,
           num_episodes=num_episodes,
           trace_length=trace_length,
           batch_size=batch_size,
           gamma=gamma,
           lr=lr,
           lr_correction=lr_correction,
           corrections=lola,
           simple_net=simple_net,
           hidden=hidden)
示例#2
0
 def run(env):
     from lola.train_pg import train
     train(env,
           num_episodes=num_episodes,
           trace_length=trace_length,
           batch_size=batch_size,
           gamma=gamma,
           set_zero=0,
           lr=lr,
           corrections=lola,
           simple_net=simple_net,
           hidden=hidden,
           mem_efficient=mem_efficient)
示例#3
0
 def run(env):
     from lola.train_exact import train
     train(env,
           num_episodes=num_episodes,
           trace_length=trace_length,
           simple_net=simple_net,
           corrections=lola,
           pseudo=pseudo,
           num_hidden=hidden,
           reg=reg,
           lr=lr,
           lr_correction=lr_correction,
           gamma=gamma)
示例#4
0
 def run(env):
     from lola.train_cg import train
     train(env,
           num_episodes=num_episodes,
           trace_length=trace_length,
           batch_size=batch_size,
           bs_mul=bs_mul,
           gamma=gamma,
           grid_size=grid_size,
           lr=lr,
           corrections=lola,
           opp_model=opp_model,
           hidden=hidden,
           mem_efficient=mem_efficient)
示例#5
0
 def run(env):
     from lola.train_cg_le import train
     train(env,
           num_episodes=num_episodes,
           trace_length=trace_length,
           batch_size=batch_size,
           bs_mul=bs_mul,
           gamma=gamma,
           grid_size=grid_size,
           lr=lr,
           corrections=lola,
           opp_model=opp_model,
           hidden=hidden,
           welfare0=welfare_fn0,
           welfare1=welfare_fn1,
           punish=True,
           mem_efficient=mem_efficient)