def start(self): weights_filename = '{}/dqn_weights/dqn_{}_weights.h5f'.format( self.cwd, self.env_name) if self.weights: self.agent.load_weights(weights_filename) print('...loading weights for {}'.format(self.env_name)) if self.train: checkpoint_weights_filename = 'dqn_' + self.env_name + \ '_weights_{step}.h5f' checkpoint_weights_filename = '{}/dqn_weights/'.format(self.cwd) + \ checkpoint_weights_filename log_filename = '{}/dqn_weights/dqn_{}_log.json'.format( self.cwd, self.env_name) print('FileLogger: {}'.format(log_filename)) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] print('Starting training...') self.agent.fit(self.env, callbacks=callbacks, nb_steps=self.number_of_training_steps, log_interval=10000, verbose=0, visualize=self.visualize) print('Saving AGENT weights...') self.agent.save_weights(weights_filename, overwrite=True) else: print('Starting TEST...') self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
def train_model(seed=1, setup=0): np.random.seed(seed) if setup == 0: env = CameraControlEnv(a_p=0, a_r=0, e_thres=0) elif setup == 1: env = CameraControlEnv(a_p=0, a_r=0, e_thres=0.8) else: env = CameraControlEnv(a_p=0.5, a_r=0.2, e_thres=0.8) env.seed(seed) model = define_model(actions=7) memory = SequentialMemory(limit=10000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.1, value_test=0.05, nb_steps=95000) dqn = DQNAgent(model=model, nb_actions=7, policy=policy, memory=memory, processor=None, nb_steps_warmup=500, gamma=0.95, delta_clip=1, target_model_update=0.001, batch_size=32) dqn.compile(RMSprop(lr=.0001), metrics=['mae']) log_filename = 'results/drone_camera_control_log_' + str(setup) + '.json' model_checkpoint_filename = 'results/drone_camera_cnn_weights_' + str(setup) + '_{step}.model' callbacks = [ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000)] callbacks += [FileLogger(log_filename, interval=1)] dqn.fit(env, nb_steps=100000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1, callbacks=callbacks) # After training is done, save the final weights. model_filename = 'models/drone_camera_cnn_' + str(setup) + '.model' dqn.save_weights(model_filename, overwrite=True)
def train(self): # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! weights_filename = self.get_weight_path(self.stock) checkpoint_weights_filename = self.get_weight_path( self.stock) + '_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(self.stock) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] # callbacks += [WandbLogger( # project = "stock-bot-v0" # )] self.dqn.fit(self.env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. self.dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. self.dqn.test(self.env, nb_episodes=10, visualize=False)
def start(self): if self.train: weights_filename = 'dqn_{}_weights.h5f'.format(self.env_id) if self.weights: self.agent.load_weights(weights_filename) print('...loading weights for {}'.format(self.env_id)) checkpoint_weights_filename = 'dqn_' + self.env_id + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(self.env_id) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] self.agent.fit(self, callbacks=callbacks, nb_steps=self.training_steps, log_interval=10000) self.agent.save_weights(weights_filename, overwrite=True) self.agent.test(self, nb_episodes=2, visualize=False) self.render() else: weights_filename = 'dqn_{}_weights.h5f'.format(self.env_id) self.agent.load_weights(weights_filename) self.agent.test(self, nb_episodes=2, visualize=False) self.render()
def build_callbacks(env_name): checkpoint_weights_filename = 'dqn_' + env_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000) ] callbacks += [FileLogger(log_filename, interval=100)] return callbacks
def build_callbacks(env_name): checkpoint_weights_filename = 'results/Swimmer/ddpg_' + env_name + '_weights_{step}.h5f' log_filename = 'results/Swimmer6/exp_5/ddpg_{}_log.json'.format(env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=5000) ] callbacks += [FileLogger(log_filename, interval=5000)] return callbacks
def train(index, policy_nb_steps, fit_nb_steps): # Get the environment and extract the number of actions. print("Using environment", environment_name) environment = gym.make(environment_name) np.random.seed(666) nb_actions = environment.action_space.shape[0] # Build the model. v_model, mu_model, l_model = build_models((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions) v_model.summary() mu_model.summary() l_model.summary() # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = CarRacingProcessor() random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3, size=nb_actions) agent = NAFAgent(nb_actions=nb_actions, V_model=v_model, L_model=l_model, mu_model=mu_model, memory=memory, nb_steps_warmup=100, random_process=random_process, gamma=.99, target_model_update=1e-3, processor=processor) agent.compile(optimizers.Adam(lr=.001, clipnorm=1.), metrics=['mae']) weights_filename = 'naf_{}_{}_weights.h5f'.format(environment_name, index) # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! checkpoint_weights_filename = 'naf_' + environment_name + '_weights_{step}.h5f' log_filename = 'naf_{}_log.json'.format(environment_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [TensorboardCallback()] callbacks += [FileLogger(log_filename, interval=100)] agent.fit( environment, callbacks=callbacks, #nb_steps=1750000, nb_steps=fit_nb_steps, log_interval=10000, visualize="visualize" in sys.argv) # After training is done, we save the final weights one more time. agent.save_weights(weights_filename, overwrite=True)
def build_callbacks(env_name, filename_pre, filename_exp): checkpoint_weights_filename = filename_pre + env_name + '_weights_{step}.h5f' log_filename = filename_pre + filename_exp + '/ddpg_{}_log.json'.format( env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=50000) ] callbacks += [FileLogger(log_filename, interval=50000)] return callbacks
def train(self): callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=INTERVAL_CALLBACK), FileLogger(log_filename, interval=FILE_LOGGER_INTERVAL) ] self.dqn.fit(self.env, callbacks=callbacks, nb_steps=NB_STEPS, log_interval=FIT_LOG_INTERVAL)
def __init__(self, args): """Initialize. :param args: namespace of arguments; see --help. """ # Params self.resuming = args.cont is not None self.initialize_from = args.cont self._log_interval = args.log_frequency self._max_episode_steps = args.max_episode_steps # Dirs model_path, log_path = tools.prepare_directories( "agent", args.env, resuming=self.resuming, args=args) log_filename = "log.json" self.log_file = os.path.join(log_path, log_filename) # Environment self.env = gym.make(args.env) self.env_name = args.env # Repeatability if args.deterministic: if "Deterministic" not in self.env_name: raise ValueError( "--deterministic only works with deterministic" " environments") self.env.seed(30013) np.random.seed(30013) tf.random.set_seed(30013) # Agent self.kerasrl_agent, self.atari_agent = self.build_agent( tools.Namespace(args, training=True)) # Tools self.saver = CheckpointSaver(agent=self.kerasrl_agent, path=model_path, interval=args.saves) self.logger = TensorboardLogger(logdir=log_path, agent=self.atari_agent) # Callbacks self.callbacks = [ self.saver, self.logger, FileLogger(filepath=self.log_file, interval=100), ] if args.random_epsilon: self.callbacks.append(self.kerasrl_agent.test_policy.callback) # Save on exit tools.QuitWithResources.add("last_save", lambda: self.saver.save())
def build_callbacks(): """ callbacks for the deep q agent """ checkpoint_weights_filename = 'model_checkpoints/dqn_weights_.h5f' log_filename = 'dqn_log.json' callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=500) ] callbacks += [FileLogger(log_filename, interval=100)] return callbacks
def start(self) -> None: """ Entry point for agent training and testing :return: (void) """ output_directory = os.path.join(self.cwd, 'dqn_weights') if not os.path.exists(output_directory): LOGGER.info('{} does not exist. Creating Directory.'.format( output_directory)) os.mkdir(output_directory) weight_name = 'dqn_{}_{}_weights.h5f'.format(self.env_name, self.neural_network_type) weights_filename = os.path.join(output_directory, weight_name) LOGGER.info("weights_filename: {}".format(weights_filename)) if self.load_weights: LOGGER.info('...loading weights for {} from\n{}'.format( self.env_name, weights_filename)) self.agent.load_weights(weights_filename) if self.train: step_chkpt = '{step}.h5f' step_chkpt = 'dqn_{}_weights_{}'.format(self.env_name, step_chkpt) checkpoint_weights_filename = os.path.join(self.cwd, 'dqn_weights', step_chkpt) LOGGER.info("checkpoint_weights_filename: {}".format( checkpoint_weights_filename)) log_filename = os.path.join( self.cwd, 'dqn_weights', 'dqn_{}_log.json'.format(self.env_name)) LOGGER.info('log_filename: {}'.format(log_filename)) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] LOGGER.info('Starting training...') self.agent.fit(self.env, callbacks=callbacks, nb_steps=self.number_of_training_steps, log_interval=10000, verbose=0, visualize=self.visualize) LOGGER.info("training over.") LOGGER.info('Saving AGENT weights...') self.agent.save_weights(weights_filename, overwrite=True) LOGGER.info("AGENT weights saved.") else: LOGGER.info('Starting TEST...') self.agent.test(self.env, nb_episodes=2, visualize=self.visualize)
def run(self, steps): callbacks = [FileLogger(self.log_filename)] self.dqn.fit( self.env, callbacks=callbacks, nb_steps=steps, visualize=False, verbose=1, log_interval=10000, ) # After training is done, we save the final weights. self.dqn.save_weights(self.weights_filename, overwrite=True)
def fit_dqn(fit_steps: int, cont: bool): training_env = gym.make('adver-v0', target_label=1, test_mode=False) callbacks = [ModelIntervalCheckpoint(DQN_WEIGHTS_FILENAME, interval=WEIGHTS_CHECKPOINT)] callbacks += [FileLogger(DQN_LOG_FILENAME, interval=LOG_CHECKPOINT)] dqn = init_dqn(fit_steps) if cont: dqn.load_weights(DQN_WEIGHTS_FILENAME.format(step="final")) print("continue fitting from last checkpoint") dqn.fit(training_env, callbacks=callbacks, nb_steps=fit_steps, start_step_policy=start_policy, nb_max_start_steps=3) dqn.save_weights(DQN_WEIGHTS_FILENAME.format(step="final"), overwrite=True)
def build_callbacks(env_name): log_dir = 'logs' if not exists(log_dir): os.makedirs(log_dir) checkpoint_weights_filename = join( log_dir, 'dqn_' + env_name + '_weights_{step}.h5f') log_filename = join(log_dir, 'dqn_{}_log.json'.format(env_name)) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000) ] callbacks += [FileLogger(log_filename, interval=100)] return callbacks
def main(): """Build model and train on environment.""" env = MarketEnv(("ES", "FUT", "GLOBEX", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=3) #env = MarketEnv(("AAPL", "STK", "SMART", "USD"), obs_xform=xform.BinaryDelta(3), episode_steps=STEPS_PER_EPISODE, client_id=4) nb_actions = 3 # Keras-RL CEM is a discrete agent # Option 1 : Simple model model = Sequential([ Flatten(input_shape=(1,) + env.observation_space.shape), Dense(nb_actions), Activation('softmax') ]) # Option 2: deep network # hidden_nodes = reduce(operator.imul, env.observation_space.shape, 1) # model = Sequential([ # Flatten(input_shape=(1,) + env.observation_space.shape), # Dense(hidden_nodes), # Activation('relu'), # Dense(hidden_nodes), # Activation('relu'), # Dense(hidden_nodes), # Activation('relu'), # Dense(nb_actions), # Activation('softmax') # ]) print(model.summary()) param_logger = CEMParamLogger('cem_{}_params.json'.format(env.instrument.symbol)) callbacks = [ param_logger, FileLogger('cem_{}_log.json'.format(env.instrument.symbol), interval=STEPS_PER_EPISODE) ] theta_init = param_logger.read_params() # Start with last saved params if present if theta_init is not None: print('Starting with parameters from {}:\n{}'.format(param_logger.params_filename, theta_init)) memory = EpisodeParameterMemory(limit=EPISODES, window_length=1) # Remember the parameters and rewards for the last `limit` episodes. cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, batch_size=EPISODES, nb_steps_warmup=WARMUMP_EPISODES * STEPS_PER_EPISODE, train_interval=TRAIN_INTERVAL_EPISODES, elite_frac=0.2, theta_init=theta_init, processor=DiscreteProcessor(), noise_decay_const=0, noise_ampl=0) """ :param memory: Remembers the parameters and rewards for the last `limit` episodes. :param int batch_size: Randomly sample this many episode parameters from memory before taking the top `elite_frac` to construct the next gen parameters from. :param int nb_steps_warmup: Run for this many steps (total) to fill memory before training :param int train_interval: Train (update parameters) every this many episodes :param float elite_frac: Take this top fraction of the `batch_size` randomly sampled parameters from the episode memory to construct new parameters. """ cem.compile() cem.fit(env, nb_steps=STEPS_PER_EPISODE * EPISODES, visualize=True, verbose=2, callbacks=callbacks) cem.save_weights('cem_{}_weights.h5f'.format(env.instrument.symbol), overwrite=True)
def train(self, steps=50000, interval=100, visualise=False): callbacks = [ ModelIntervalCheckpoint(self.checkpoint_path, interval=10), FileLogger(self.log_path, interval=100), ] self.dqn.fit(env=self.env, nb_steps=steps, visualize=visualise, callbacks=callbacks, log_interval=interval, nb_max_episode_steps=self.env.total_steps - 1) self.dqn.save_weights(self.model_path, overwrite=True)
def train(self, env, steps, log_interval=5000): self.dqn.fit( env, callbacks=[FileLogger("dqn_log.json")], log_interval=log_interval, nb_steps=steps, enable_dueling_network=True, # Enable dueling dueling_type="avg", enable_double_dqn=True, # Enable double dqn verbose=1, visualize=False, ) # After training is done, we save the final weights. self.dqn.save_weights("dqn_weights.h5f", overwrite=True)
def trainDQN(cfg, env, dqn): # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that you can the built-in Keras callbacks! callbacks = [ ModelIntervalCheckpoint(cfg.checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(cfg.log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=cfg.nb_steps_dqn_fit, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(cfg.weights_filename, overwrite=True)
def train(self, env): policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,nb_steps=1000) amount_memory = 10000000 memory = SequentialMemory(limit=amount_memory, window_length=WINDOW_LENGTH) processor = EmptyProcessor() self.dqn = DQNAgent(model=self.model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50, gamma=.99, target_model_update=10000,train_interval=4, delta_clip=1., enable_double_dqn=False) self.dqn.compile(Adam(lr=0.01), metrics=['mae']) weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME) checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(ENV_NAME) callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=2500)] callbacks += [FileLogger(log_filename, interval=100)] #self.dqn.load_weights(ENV_NAME+'weights.h5f') self.dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000, visualize=True, verbose=2) # After training is done, we save the final weights one more time. self.dqn.save_weights(weights_filename, overwrite=True)
def train(self, env, nb_actions): weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME) checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(ENV_NAME) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=10000) ] callbacks += [FileLogger(log_filename, interval=100)] self.dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000, visualize=True, verbose=3) # After training is done, we save the final weights one more time. self.dqn.save_weights(weights_filename, overwrite=True)
def main(): # Get the environment and extract the number of actions. environment_name = "lawnmower-medium-obstacles-v0" environment = gym.make(environment_name) environment.print_description() nb_actions = environment.action_space.n # Build the model. model = build_model_cnn((WINDOW_LENGTH,) + INPUT_SHAPE, nb_actions) print(model.summary()) # Create sequential memory for memory replay. memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) # Process environment inputs and outputs. processor = LawnmowerProcessor() # Use epsilon-greedy as our policy. policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=int(STEPS * 0.8)) # Instantiate and compile our agent. dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae']) # Set up some callbacks for training. checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(environment_name) callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)] callbacks += [TensorboardCallback(os.path.join("tensorboard", datetime_string))] callbacks += [FileLogger(log_filename, interval=100)] # Train the agent. dqn.fit(environment, callbacks=callbacks, nb_steps=STEPS, log_interval=10000) # Save the final networkt after training. weights_filename = 'dqn_{}_weights.h5f'.format(environment_name) dqn.save_weights(weights_filename, overwrite=True) # Run the agent. dqn.test(environment, nb_episodes=10, visualize=False)
def train_model(seed=1): np.random.seed(seed) env = CameraControlEnvCont() env.seed(seed) actor, critic, action_input = define_actor_critic_models(actions=3) memory = SequentialMemory(limit=10000, window_length=1) random_process = GaussianWhiteNoiseProcess(mu=0, sigma=0.1, sigma_min=0.01, n_steps_annealing=49000, size=3) agent = DDPGAgent(nb_actions=3, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=500, nb_steps_warmup_actor=500, random_process=random_process, gamma=.1, target_model_update=1e-3, batch_size=32) agent.compile([RMSprop(lr=.0001), RMSprop(lr=.01)], metrics=['mae']) log_filename = 'results/drone_camera_cont_control_log.json' model_checkpoint_filename = 'results/drone_camera_cont_cnn_weights_{step}.model' callbacks = [ ModelIntervalCheckpoint(model_checkpoint_filename, interval=5000) ] callbacks += [FileLogger(log_filename, interval=1)] agent.fit(env, nb_steps=50000, nb_max_episode_steps=100, verbose=2, visualize=False, log_interval=1, callbacks=callbacks)
def train(self, max_steps=100, episodes=100): # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. self.env._max_steps = max_steps #for i in range(episodes): self.env.current_step = 0 n_steps = max_steps * episodes logger = FileLogger( filepath='{}/{}.json'.format(self.out_path, self.ENV_NAME)) self.dqn.fit(self.env, nb_steps=n_steps, nb_max_episode_steps=max_steps, visualize=False, verbose=1, callbacks=[logger]) #self.env.reset() # After episode is done, we save the final weights. self.dqn.save_weights('{}/{}.h5'.format(self.out_path, self.ENV_NAME), overwrite=True)
def create_dqn(model, log_interval=50000, model_name='dqn_agent_checkpoint', file_log_path='./logs/log.txt', tensorboard_path='./logs/tensorboard/'): model_path = './models/' + model_name + '.h5' file_logger = FileLogger(file_log_path, interval=log_interval) checkpoint = ModelIntervalCheckpoint(model_path, interval=log_interval) tensorboard = TensorboardLogger(tensorboard_path) callbacks = [file_logger, checkpoint, tensorboard] # Use 4 last observation - history_length = 4 memory = SequentialMemory(limit=500000, window_length=history_length) # Use combine of BoltzmannQPolicy and EpsGreedyQPolicy policy = MaxBoltzmannQPolicy() # Set epsilon to 1.0 and decrease it over every step to stop taking random action when map is explored policy = LinearAnnealedPolicy(inner_policy=policy, attr='eps', value_max=1.0, value_min=0.1, value_test=0.04, nb_steps=NUMBER_OF_STEPS) # Create an instance of DQNAgent from keras-rl dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, policy=policy, processor=CustomProcessor(), nb_steps_warmup=512, enable_dueling_network=True, dueling_type='avg', target_model_update=5e2, batch_size=32) dqn.compile(Adam(lr=5e-4), metrics=['mae']) return dqn, callbacks
def train(self, env: Env, base_folder: str, nbr_steps: int=1000): """ Trains the agent :param env: The environment to train on :param base_folder: :param nbr_steps: """ timef = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") # filenames weights_out_filename = 'dqn_weights_{t}_trained_{nbr}.h5f'.format(t=timef, nbr=nbr_steps) checkpoint_weights_filename = 'dqn_checkpoint_weights_'+timef+'_{step}.h5f' log_filename = 'trainlog_dqn_{}.json'.format(timef) # full path of files template = '{folder}/{filename}' weights_out_file = template.format(folder=base_folder, filename=weights_out_filename) checkpoint_weights_file = template.format(folder=base_folder, filename=checkpoint_weights_filename) log_file = template.format(folder=base_folder, filename=log_filename) # Make sure the files/folders exists make_sure_path_exists(base_folder) for fn in [weights_out_file, log_file]: if not os.path.exists(fn): with open(fn, "w"): pass callbacks = [ModelIntervalCheckpoint(checkpoint_weights_file, interval=ceil(nbr_steps//5))] # 5 checkpoints callbacks += [FileLogger(log_file, interval=ceil(nbr_steps//100))] # update 100 times # set LinearAnnealedPolicy policy such that tau reaches min value at 75% of nbr_steps self.agent.policy = LinearAnnealedPolicy(BoltzmannQPolicy(clip=(-500, 300)), attr='tau', value_max=0.8, value_min=0.3, value_test=0.01, nb_steps=ceil(nbr_steps*0.8)) # self.agent.policy = BoltzmannQPolicy(clip=(-500, 300), tau=0.1) self.agent.fit(env, nb_steps=nbr_steps, visualize=False, verbose=0, nb_max_start_steps=0, callbacks=callbacks) logger.info("saving the weights to {}".format(weights_out_file)) self.agent.save_weights(weights_out_file, overwrite=True)
def train(self, env): ''' Trains the agent ''' checkpoint_weights_filename = 'brain_weights_{step}.h5f' log_filename = 'brain_log.json' callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=25000), FileLogger(log_filename, interval=100) ] # Training our agent self.agent.fit(env, callbacks=callbacks, nb_steps=175000, log_interval=10000, verbose=0) # just to remove the progress bar # After training is done, we save the final weights one more time. self.agent.save_weights(self.weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. self.agent.test(env, nb_episodes=10, visualize=False)
def train_agent(dqn, env, weights_filename): """ trainning """ checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(ENV_NAME) callbacks = [ ModelIntervalCheckpoint( checkpoint_weights_filename, interval=CALLBACK_INTERVAL ) ] callbacks += [FileLogger(log_filename, interval=LOG_INTERVAL)] dqn.fit( env, callbacks=callbacks, nb_steps=MAX_STEPS, log_interval=LOG_INTERVAL ) dqn.save_weights( weights_filename, overwrite=True )
target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(Adam(lr=.00025), metrics=['mae']) if args.mode == 'train': # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! weights_filename = '/content/gdrive/My Drive/dqn_{}_weights.h5f'.format( args.env_name) checkpoint_weights_filename = 'dqn_' + args.env_name + '_weights_{step}.h5f' log_filename = '/content/gdrive/My Drive/dqn_{}_log.json'.format( args.env_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(env, nb_episodes=10, visualize=False) elif args.mode == 'test': weights_filename = '/content/gdrive/My Drive/dqn_{}_weights.h5f'.format( args.env_name) if args.weights: weights_filename = args.weights dqn.load_weights(weights_filename) dqn.test(env, nb_episodes=10, visualize=True)
def main(): # Get the environment and extract the number of actions. environment_name = "FlappyBird-v0" environment = gym.make(environment_name) np.random.seed(666) nb_actions = environment.action_space.n # Build the model. model = build_model((WINDOW_LENGTH, ) + INPUT_SHAPE, nb_actions) print(model.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) processor = FlappyBirdProcessor() # Select a policy. We use eps-greedy action selection, which means that a random action is selected # with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that # the agent initially explores the environment (high eps) and then gradually sticks to what it knows # (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05 # so that the agent still performs some random actions. This ensures that the agent cannot get stuck. policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, nb_steps=1000000) # The trade-off between exploration and exploitation is difficult and an on-going research topic. # If you want, you can experiment with the parameters or use a different policy. Another popular one # is Boltzmann-style exploration: # policy = BoltzmannQPolicy(tau=1.) # Feel free to give it a try! dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, train_interval=4, delta_clip=1.) dqn.compile(optimizers.Adam(lr=.00025), metrics=['mae']) weights_filename = 'dqn_{}_weights.h5f'.format(environment_name) # Okay, now it's time to learn something! We capture the interrupt exception so that training # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! checkpoint_weights_filename = 'dqn_' + environment_name + '_weights_{step}.h5f' log_filename = 'dqn_{}_log.json'.format(environment_name) callbacks = [ ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000) ] callbacks += [TensorboardCallback()] callbacks += [FileLogger(log_filename, interval=100)] dqn.fit(environment, callbacks=callbacks, nb_steps=1750000, log_interval=10000) # After training is done, we save the final weights one more time. dqn.save_weights(weights_filename, overwrite=True) # Finally, evaluate our algorithm for 10 episodes. dqn.test(environment, nb_episodes=10, visualize=False)