def main(): trainer = make_trainer( 'a3c', env_maker=lambda: make_env(*sys.argv[1:]), feature_maker=lambda o: make_feature(o, num_hid=100), state_to_input=state_to_input, num_parallel=12, train_steps=100000000, interval_save=100000, save_dir='output', catch_signal=True, verbose=True, ) trainer.run()
# # feature = layers.Dense(512)(conv_flat) # feature = layers.Activation('relu')(feature) # actor (policy) and critic (value) streams # logits = model.add(Dense(4, activation="linear")) # value = layers.Dense(4)(feature) #return models.Model(inputs=ph_state, outputs=[modela, modelb]) #return model if __name__ == '__main__': trainer = make_trainer( algorithm='acer', env_maker=lambda: gym.make("EuroDolTrain-v0"), model_maker=make_model, num_parallel=1, train_steps=1000, online_learning=False, verbose=True, batch_size=1, save_dir=PATH_TO_MODEL ) trainer.run() # if __name__ == '__main__': # trainer = make_trainer( # algorithm='impala', # env_maker=lambda: gym.make("EuroDolTrain-v0"), # feature_maker=lambda obs_space: make_feature(obs_space, [200, 100]), # num_parallel=1, # train_steps=1000, # online_learning=False,
from drlbox.evaluator import make_evaluator # from baselines import deepq from tesr import * def callback(lcl, _glb): return False def make_feature(observation_space, num_hid_list): inp_state = Input(shape=observation_space) print('\n observation space:', observation_space) feature = inp_state for num_hid in num_hid_list: feature = Dense(num_hid)(feature) feature = Activation('relu')(feature) return inp_state, feature if __name__ == '__main__': trainer = make_trainer( algorithm='impala', env_maker=lambda: gym.make("EuroDolTrain-v0"), feature_maker=lambda obs_space: make_feature(obs_space, [200, 100]), num_parallel=1, train_steps=2000, verbose=True, batch_size=2, save_dir="dir", ) trainer.run()
''' DQN on Breakout-v0 ''' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--envname', type=str, default='Pong-v0') parser.add_argument('--nbsteps', default=1750000) parser.add_argument('--exp', choices=['eps', 'bq', 'bgq', 'leps', 'noisy'], default='eps') args = parser.parse_args() ENV_NAME = args.envname POL = args.exp nb_steps = args.nbsteps trainer = make_trainer('dqn', env_maker=lambda: make_env(ENV_NAME), model_maker=make_model, state_to_input=state_to_input, train_steps=nb_steps, rollout_maxlen=4, batch_size=32, verbose=True, dqn_double=False, noisynet='fg', num_parallel=2, replay_type='uniform', replay_kwargs=dict(maxlen=1000000)) trainer.run()
from drlbox.trainer import make_trainer ''' Input arguments: observation_space: Observation space of the environment; num_hid_list: List of hidden unit numbers in the fully-connected net. ''' def make_feature(observation_space, num_hid_list): inp_state = Input(shape=observation_space.shape) feature = inp_state for num_hid in num_hid_list: feature = Dense(num_hid)(feature) feature = Activation('relu')(feature) return inp_state, feature ''' A3C, CartPole-v0 ''' if __name__ == '__main__': trainer = make_trainer( algorithm='a3c', env_maker=lambda: gym.make('CartPole-v0'), feature_maker=lambda obs_space: make_feature(obs_space, [200, 100]), num_parallel=1, train_steps=1000, verbose=True, ) trainer.run()
# num_parallel=1, # train_steps=1000, # online_learning=False, # verbose=True, # batch_size=1, # save_dir="F:\\model.h5" # ) # trainer.run() if __name__ == '__main__': trainer = make_trainer( algorithm='acer', env_maker=lambda: gym.make("EuroDolTrain-v0"), feature_maker=lambda obs_space: make_feature(obs_space, [200, 100]), num_parallel=1, train_steps=1000, online_learning=False, verbose=True, batch_size=1, save_dir="F:\\model.h5" ) trainer.run() evaluator = make_evaluator( env_maker=lambda: gym.make("EuroDolEval-v0"), render_timestep=1, load_model="F:\\dir\\model.h5", render_end=False, num_episode=1, algorithm='acer', verbose=True,
conv3 = layers.Conv2D(64, (3, 3), strides=(1, 1))(conv2) conv3 = layers.Activation('relu')(conv3) conv_flat = layers.Flatten()(conv3) feature = layers.Dense(512)(conv_flat) feature = layers.Activation('relu')(feature) # actor (policy) and critic (value) streams size_logits = size_value = env.action_space.n logits_init = initializers.RandomNormal(stddev=1e-3) logits = layers.Dense(size_logits, kernel_initializer=logits_init)(feature) value = layers.Dense(size_value)(feature) return models.Model(inputs=ph_state, outputs=[logits, value]) ''' ACER on Breakout-v0 ''' if __name__ == '__main__': trainer = make_trainer( 'acer', env_maker=lambda: make_env('Breakout-v0'), model_maker=make_model, state_to_input=state_to_input, num_parallel=1, train_steps=1000, rollout_maxlen=4, batch_size=8, verbose=True, ) trainer.run()
Input arguments: observation_space: Observation space of the environment; num_hid_list: List of hidden unit numbers in the fully-connected net. ''' def make_feature(observation_space, num_hid_list): inp_state = Input(shape=observation_space.shape) feature = inp_state for num_hid in num_hid_list: feature = Dense(num_hid)(feature) feature = Activation('relu')(feature) return inp_state, feature if __name__ == '__main__': current_dir = os.getcwd() config_path = os.path.join(current_dir, "../configs/iris.config") os.environ["GYMFC_CONFIG"] = config_path env = gym.make('AttFC_GyroErr-MotorVel_M4_Con-v0') # env = RewScale(env, 0.1) trainer = make_trainer( algorithm='a3c', env_maker=lambda: env, feature_maker=lambda obs_space: make_feature(obs_space, [64, 128, 64]), num_parallel=5, train_steps=1000, verbose=True, ) trainer.run()