def restore(self, sess, restore_from=None): if restore_from is None: restore_from = self._restore_path if self.print_log: print_and_log('saver: {} restoring model from {}'.format( self.name, restore_from)) self._saver.restore(sess, restore_from)
def collect_test_data(self, top_level, is_challenging=False): if is_challenging: start_goal_pairs = self.hard_fixed_start_goal_pairs else: start_goal_pairs = self.fixed_start_goal_pairs print_and_log('collecting {} test episodes of level {}'.format( len(start_goal_pairs), top_level)) return self.collect_data(start_goal_pairs, top_level, False)
def collect_data(self, start_goal_pairs, top_level, is_train): episode_results = self.episode_runner.play_episodes( start_goal_pairs, top_level, is_train) successes, accumulated_cost, dataset, endpoints_by_path = self._process_episode_results( episode_results, top_level) print_and_log( 'data collection done, success rate is {}, accumulated cost is {}'. format(successes, accumulated_cost)) return successes, accumulated_cost, dataset, endpoints_by_path
def save(self, sess, global_step): self.assertion_w_dictionary = self.get_assertion_vars(sess) self._restore_path = self._saver.save(sess, os.path.join( self.saver_dir, self.name), global_step=global_step) if self.print_log: print_and_log( 'saver {}: saved model from global step {} to {}'.format( self.name, global_step, self._restore_path))
def policy_function(starts, goals, level, is_train): res = network.predict_policy(starts, goals, level, sess, is_train) means = 0.5 * (np.array(starts) + np.array(goals)) distance = np.linalg.norm(res[0] - means, axis=1) print( f'distance from mean: mean {distance.mean()} min {distance.min()} max {distance.max()}' ) if np.any(np.isnan(res)): print_and_log( '######################## Nan predictions detected...') return res
def collect_data(self, count, is_train=True, use_fixed_start_goal_pairs=False): print_and_log('collecting {} {} episodes'.format( count, 'train' if is_train else 'test')) if use_fixed_start_goal_pairs: episode_results = self.episode_runner.play_fixed_episodes(is_train) else: episode_results = self.episode_runner.play_random_episodes( count, is_train) successes, accumulated_cost, dataset, endpoints_by_path = self._process_episode_results( episode_results) print_and_log( 'data collection done, success rate is {}, accumulated cost is {}'. format(successes, accumulated_cost)) if is_train: self.train_episodes_counter += len(endpoints_by_path) return successes, accumulated_cost, dataset, endpoints_by_path
def end_of_level_test(best_cost, best_cost_global_step, best_curriculum_coefficient, best_saver, sess, test_trajectories_dir, trainer): print_and_log('end of run. best: {} from step: {}'.format(best_cost, best_cost_global_step)) restore_best(sess, best_saver, best_curriculum_coefficient, trainer) # test all test_trajectories_file = os.path.join(test_trajectories_dir, 'all.txt') endpoints_by_path = trainer.collect_test_data(is_challenging=False)[-1] serialize_compress(endpoints_by_path, test_trajectories_file) print_and_log(os.linesep) # test hard test_trajectories_file = os.path.join(test_trajectories_dir, 'challenging.txt') endpoints_by_path = trainer.collect_test_data(is_challenging=True)[-1] serialize_compress(endpoints_by_path, test_trajectories_file) print_and_log(os.linesep)
def run_for_config(config): # set the name of the model model_name = config['general']['name'] now = datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S') model_name = now + '_' + model_name if model_name is not None else now # where we save all the outputs scenario = config['general']['scenario'] working_dir = os.path.join(get_base_directory(), 'sgt', scenario) init_dir(working_dir) saver_dir = os.path.join(working_dir, 'models', model_name) init_dir(saver_dir) init_log(log_file_path=os.path.join(saver_dir, 'log.txt')) copy_config(config, os.path.join(saver_dir, 'config.yml')) episodic_success_rates_path = os.path.join(saver_dir, 'results.txt') test_trajectories_dir = os.path.join(working_dir, 'test_trajectories', model_name) init_dir(test_trajectories_dir) # generate game game = _get_game(config) network = Network(config, game) network_variables = network.get_all_variables() # save model latest_saver = ModelSaver(os.path.join(saver_dir, 'latest_model'), 2, 'latest', variables=network_variables) best_saver = ModelSaver(os.path.join(saver_dir, 'best_model'), 1, 'best', variables=network_variables) summaries_collector = SummariesCollector( os.path.join(working_dir, 'tensorboard', model_name), model_name) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( gpu_options=tf.compat.v1.GPUOptions( per_process_gpu_memory_fraction=config['general'] ['gpu_usage']))) as sess: sess.run(tf.compat.v1.global_variables_initializer()) def policy_function(starts, goals, level, is_train): res = network.predict_policy(starts, goals, level, sess, is_train) means = 0.5 * (np.array(starts) + np.array(goals)) distance = np.linalg.norm(res[0] - means, axis=1) print( f'distance from mean: mean {distance.mean()} min {distance.min()} max {distance.max()}' ) if np.any(np.isnan(res)): print_and_log( '######################## Nan predictions detected...') return res episode_runner = EpisodeRunnerSubgoal(config, game, policy_function) trainer = TrainerSubgoal( model_name, config, working_dir, network, sess, episode_runner, summaries_collector, curriculum_coefficient=get_initial_curriculum(config)) decrease_learn_rate_if_static_success = config['model'][ 'decrease_learn_rate_if_static_success'] stop_training_after_learn_rate_decrease = config['model'][ 'stop_training_after_learn_rate_decrease'] reset_best_every = config['model']['reset_best_every'] global_step = 0 best_curriculum_coefficient = None for current_level in range(config['model']['starting_level'], config['model']['levels'] + 1): best_cost, best_cost_global_step = None, None no_test_improvement, consecutive_learn_rate_decrease = 0, 0 if config['model']['init_from_lower_level'] and current_level > 1: print_and_log('initiating level {} from previous level'.format( current_level)) network.init_policy_from_lower_level(sess, current_level) for cycle in range(config['general']['training_cycles_per_level']): print_and_log('starting cycle {}, level {}'.format( cycle, current_level)) new_global_step, success_ratio = trainer.train_policy_at_level( current_level, global_step) if new_global_step == global_step: print_and_log( 'no data found in training cycle {} global step still {}' .format(cycle, global_step)) continue else: global_step = new_global_step if (cycle + 1) % config['policy']['decrease_std_every'] == 0: network.decrease_base_std(sess, current_level) print_and_log('new base stds {}'.format( network.get_base_stds(sess, current_level))) print_and_log('done training cycle {} global step {}'.format( cycle, global_step)) # save every now and then if cycle % config['general']['save_every_cycles'] == 0: latest_saver.save(sess, global_step=global_step) if cycle % config['general']['test_frequency'] == 0: # do test test_successes, test_cost, _, endpoints_by_path = trainer.collect_test_data( current_level, False) summaries_collector.write_test_success_summaries( sess, global_step, test_successes, test_cost, trainer.curriculum_coefficient) with open(episodic_success_rates_path, 'a') as f: f.write('{} {} {} {} {}'.format( current_level, trainer.train_episodes_counter, test_successes, test_cost, os.linesep)) # decide how to act next print_and_log('old cost was {} at step {}'.format( best_cost, best_cost_global_step)) print_and_log('current learn rates {}'.format( network.get_learn_rates(sess, current_level))) print_and_log('current base stds {}'.format( network.get_base_stds(sess, current_level))) if best_cost is None or test_cost < best_cost: print_and_log('new best cost {} at step {}'.format( test_cost, global_step)) best_cost, best_cost_global_step = test_cost, global_step best_curriculum_coefficient = trainer.curriculum_coefficient no_test_improvement, consecutive_learn_rate_decrease = 0, 0 best_saver.save(sess, global_step) test_trajectories_file = os.path.join( test_trajectories_dir, '{}.txt'.format(global_step)) serialize_compress(endpoints_by_path, test_trajectories_file) else: print_and_log( 'new model is not the best with cost {} at step {}' .format(test_cost, global_step)) no_test_improvement += 1 print_and_log('no improvement count {} of {}'.format( no_test_improvement, decrease_learn_rate_if_static_success)) if reset_best_every > 0 and no_test_improvement % reset_best_every == reset_best_every - 1: # restore the model every once in a while if did not find a better solution in a while restore_best(sess, best_saver, best_curriculum_coefficient, trainer) if no_test_improvement == decrease_learn_rate_if_static_success: # restore the best model if config['model']['restore_on_decrease']: restore_best(sess, best_saver, best_curriculum_coefficient, trainer) # decrease learn rates network.decrease_learn_rates(sess, current_level) no_test_improvement = 0 consecutive_learn_rate_decrease += 1 print_and_log( 'decreasing learn rates {} of {}'.format( consecutive_learn_rate_decrease, stop_training_after_learn_rate_decrease)) print_and_log('new learn rates {}'.format( network.get_learn_rates(sess, current_level))) if consecutive_learn_rate_decrease == stop_training_after_learn_rate_decrease: break if trainer.curriculum_coefficient is not None: if success_ratio > config['curriculum'][ 'raise_when_train_above']: print_and_log( 'current curriculum coefficient {}'.format( trainer.curriculum_coefficient)) trainer.curriculum_coefficient *= config['curriculum'][ 'raise_times'] print_and_log( 'curriculum coefficient raised to {}'.format( trainer.curriculum_coefficient)) # mark in log the end of cycle print_and_log(os.linesep) # if we finished because we ran out of cycles, we still need to make one more test end_of_level_test(best_cost, best_cost_global_step, best_curriculum_coefficient, best_saver, sess, test_trajectories_dir, trainer, current_level) print_and_log('trained all levels - needs to stop') close_log() return best_cost
def policy_function(starts, goals, is_train): res = network.predict_policy(starts, goals, sess, is_train) if np.any(np.isnan(res)): print_and_log('######################## Nan predictions detected...') return res
def collect_train_data(self, count, top_level): print_and_log('collecting {} train episodes of level {}'.format( count, top_level)) start_goal_pairs = self.episode_runner.game.get_start_goals( count, self.curriculum_coefficient, get_free_states=True) return self.collect_data(start_goal_pairs, top_level, True)
def run_for_config(config): # set the name of the model model_name = config['general']['name'] now = datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H_%M_%S') model_name = now + '_' + model_name if model_name is not None else now # where we save all the outputs scenario = config['general']['scenario'] working_dir = os.path.join(get_base_directory(), 'sequential', scenario) init_dir(working_dir) saver_dir = os.path.join(working_dir, 'models', model_name) init_dir(saver_dir) init_log(log_file_path=os.path.join(saver_dir, 'log.txt')) copy_config(config, os.path.join(saver_dir, 'config.yml')) episodic_success_rates_path = os.path.join(saver_dir, 'results.txt') weights_log_dir = os.path.join(saver_dir, 'weights_logs') init_dir(weights_log_dir) test_trajectories_dir = os.path.join(working_dir, 'test_trajectories', model_name) init_dir(test_trajectories_dir) # generate game game = _get_game(config) network = NetworkSequential(config, game.get_state_space_size(), game.get_action_space_size(), is_rollout_agent=False) network_variables = network.get_all_variables() # save model latest_saver = ModelSaver(os.path.join(saver_dir, 'latest_model'), 2, 'latest', variables=network_variables) best_saver = ModelSaver(os.path.join(saver_dir, 'best_model'), 1, 'best', variables=network_variables) summaries_collector = SummariesCollector( os.path.join(working_dir, 'tensorboard', model_name), model_name) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( gpu_options=tf.compat.v1.GPUOptions( per_process_gpu_memory_fraction=config['general'] ['gpu_usage']))) as sess: sess.run(tf.compat.v1.global_variables_initializer()) episode_runner = EpisodeRunnerSequential( config, game, curriculum_coefficient=get_initial_curriculum(config)) trainer = TrainerSequential(model_name, config, working_dir, network, sess, episode_runner, summaries_collector) decrease_learn_rate_if_static_success = config['model'][ 'decrease_learn_rate_if_static_success'] stop_training_after_learn_rate_decrease = config['model'][ 'stop_training_after_learn_rate_decrease'] reset_best_every = config['model']['reset_best_every'] global_step = 0 best_cost, best_cost_global_step, best_curriculum_coefficient = None, None, None no_test_improvement, consecutive_learn_rate_decrease = 0, 0 for cycle in range(config['general']['training_cycles']): print_and_log('starting cycle {}'.format(cycle)) global_step, success_ratio = trainer.train_policy(global_step) if (cycle + 1) % config['policy']['decrease_std_every'] == 0: network.decrease_base_std(sess) print_and_log('new base stds {}'.format( network.get_base_std(sess))) print_and_log('done training cycle {} global step {}'.format( cycle, global_step)) # save every now and then if cycle % config['general']['save_every_cycles'] == 0: latest_saver.save(sess, global_step=global_step) if cycle % config['general']['test_frequency'] == 0: # do test test_successes, test_cost, _, endpoints_by_path = trainer.collect_data( config['general']['test_episodes'], is_train=False, use_fixed_start_goal_pairs=True) summaries_collector.write_test_success_summaries( sess, global_step, test_successes, test_cost, episode_runner.curriculum_coefficient) with open(episodic_success_rates_path, 'a') as f: f.write('{} {} {} {}'.format( trainer.train_episodes_counter, test_successes, test_cost, os.linesep)) # decide how to act next print_and_log('old cost was {} at step {}'.format( best_cost, best_cost_global_step)) print_and_log('current learn rates {}'.format( network.get_learn_rate(sess))) print_and_log('current base stds {}'.format( network.get_base_std(sess))) if best_cost is None or test_cost < best_cost: print_and_log('new best cost {} at step {}'.format( test_cost, global_step)) best_cost, best_cost_global_step = test_cost, global_step best_curriculum_coefficient = episode_runner.curriculum_coefficient no_test_improvement = 0 consecutive_learn_rate_decrease = 0 best_saver.save(sess, global_step) test_trajectories_file = os.path.join( test_trajectories_dir, '{}.txt'.format(global_step)) serialize_compress(endpoints_by_path, test_trajectories_file) else: print_and_log( 'new model is not the best with cost {} at step {}'. format(test_cost, global_step)) no_test_improvement += 1 print_and_log('no improvement count {} of {}'.format( no_test_improvement, decrease_learn_rate_if_static_success)) if reset_best_every > 0 and no_test_improvement % reset_best_every == reset_best_every - 1: # restore the model every once in a while if did not find a better solution in a while best_saver.restore(sess) episode_runner.curriculum_coefficient = best_curriculum_coefficient if no_test_improvement == decrease_learn_rate_if_static_success: # restore the best model if config['model']['restore_on_decrease']: best_saver.restore(sess) episode_runner.curriculum_coefficient = best_curriculum_coefficient network.decrease_learn_rates(sess) no_test_improvement = 0 consecutive_learn_rate_decrease += 1 print_and_log('decreasing learn rates {} of {}'.format( consecutive_learn_rate_decrease, stop_training_after_learn_rate_decrease)) print_and_log('new learn rates {}'.format( network.get_learn_rate(sess))) if consecutive_learn_rate_decrease == stop_training_after_learn_rate_decrease: print_and_log('needs to stop') best_saver.restore(sess) break if episode_runner.curriculum_coefficient is not None: if success_ratio > config['curriculum'][ 'raise_when_train_above']: print_and_log('current curriculum coefficient {}'.format( episode_runner.curriculum_coefficient)) episode_runner.curriculum_coefficient *= config[ 'curriculum']['raise_times'] print_and_log('curriculum coefficient raised to {}'.format( episode_runner.curriculum_coefficient)) # mark in log the end of cycle print_and_log(os.linesep) print_and_log('end of run best: {} from step: {}'.format( best_cost, best_cost_global_step)) print_and_log('testing on a new set of start-goal pairs') best_saver.restore(sess) test_trajectories_file = os.path.join(test_trajectories_dir, '-1.txt') endpoints_by_path = trainer.collect_data( config['general']['test_episodes'], is_train=False, use_fixed_start_goal_pairs=True)[-1] serialize_compress(endpoints_by_path, test_trajectories_file) close_log() return best_cost