def main(args): np.random.seed(2019) if not args.show_graphs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX, args.game, args.num_blocks, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = [ "no_sample", "only_one_q_value" "disable_batch_norm" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() if args.game == constants.GAME_BREAKOUT: num_actions = 6 else: raise ValueError("Unknown game.") runner = QRunnerMinAtar( args.load_path, num_actions, logger, saver, args.num_blocks, args.encoder_learning_rate, args.weight_decay, args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, zero_sd_after_training=args.zero_sd_after_training ) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def main(args): np.random.seed(2019) if not args.show_graphs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.num_pucks, args.grid_size, args.grid_size, args.num_blocks, args.num_components, args.beta0, args.beta1, args.beta2, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = ["no_sample", "only_one_q_value", "gt_q_values", "disable_batch_norm"] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner = QGMMPriorRunner( args.load_path, args.grid_size, args.num_pucks, logger, saver, args.num_blocks, args.num_components, args.hiddens, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay, args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, gt_q_values=args.gt_q_values, disable_resize=args.disable_resize, oversample=args.oversample, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, include_goal_states=args.include_goal_states, q_values_noise_sd=args.q_values_noise_sd, new_dones=args.new_dones ) runner.setup() runner.main_training_loop() if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def main(args): assert args.active_indices is not None assert args.eval_indices is not None assert args.dones_index is not None np.random.seed(2019) if not args.show_graphs and not args.show_qs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.num_pucks, args.grid_size, args.grid_size, args.num_blocks, args.num_components, args.beta0, args.beta1, args.beta2, args.beta3, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = [ "no_sample", "only_one_q_value", "disable_batch_norm", "train_prior", "post_train_prior", "post_train_t_and_prior", "post_train_hmm" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file("main", "log"), print_logs=True) logger.silence_tensorflow() runner = QHMMPriorMultiTask( args.load_path, args.grid_size, args.num_pucks, logger, saver, args.num_blocks, args.num_components, args.hiddens, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay, args.encoder_optimizer, args.num_steps, args.active_indices, args.eval_indices, args.dones_index, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, oversample=args.oversample, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, beta3=args.beta3, post_train_prior=args.post_train_prior, post_train_hmm=args.post_train_hmm, post_train_t_and_prior=args.post_train_t_and_prior, save_gifs=args.save_gifs, hard_abstract_state=args.hard_abstract_state, zero_sd_after_training=args.zero_sd_after_training, prune_abstraction=args.prune_abstraction, prune_threshold=args.prune_threshold, old_bn_settings=args.old_bn_settings, include_goal_states=args.include_goal_states, shift_q_values=args.shift_q_values, soft_picture_goals=args.soft_picture_goals, goal_rewards_threshold=args.goal_rewards_threshold, q_values_indices=args.q_values_indices, fast_eval=args.fast_eval, sample_abstract_state=args.sample_abstract_state, softmax_policy=args.softmax_policy, softmax_policy_temp=args.softmax_policy_temp, show_qs=args.show_qs, fix_prior_training=args.fix_prior_training, model_learning_rate=args.model_learning_rate, random_shape=args.random_shape ) runner.setup() runner.main_training_loop() if args.post_train_hmm: runner.post_hmm_training_loop(args.post_train_hmm_steps) if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()
def main(args): np.random.seed(2019) if not args.show_graphs: matplotlib.use("pdf") if args.gpus is not None: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus saver = Saver(None) if args.save: dir_variables = [ SAVE_PREFIX.format(args.base_dir), args.game, args.num_blocks, args.num_components, args.beta0, args.beta1, args.beta2, args.beta3, args.encoder_learning_rate, args.encoder_optimizer, args.num_steps ] dir_switches = [ "no_sample", "only_one_q_value", "disable_batch_norm", "train_prior", "post_train_prior", "post_train_t_and_prior", "post_train_hmm" ] saver.create_dir_name(SAVE_TEMPLATE, dir_variables, dir_switches, args) saver.create_dir(add_run_subdir=True) saver.save_by_print(args, "settings") logger = LearnExpectationContinuousLogger(save_file=saver.get_save_file( "main", "log"), print_logs=True) logger.silence_tensorflow() num_actions = 6 model_learning_rate = args.encoder_learning_rate if args.model_learning_rate is not None: model_learning_rate = args.model_learning_rate runner = QHMMPriorRunnerMinAtar( args.load_path, args.game, num_actions, logger, saver, args.num_blocks, args.num_components, args.encoder_learning_rate, args.beta0, args.beta1, args.beta2, args.weight_decay, args.encoder_optimizer, args.num_steps, disable_batch_norm=args.disable_batch_norm, disable_softplus=args.disable_softplus, no_sample=args.no_sample, only_one_q_value=args.only_one_q_value, validation_freq=args.validation_freq, validation_fraction=args.validation_fraction, summaries=args.summaries, load_model_path=args.load_model_path, beta3=args.beta3, post_train_prior=args.post_train_prior, post_train_hmm=args.post_train_hmm, post_train_t_and_prior=args.post_train_t_and_prior, save_gifs=args.save_gifs, zero_sd_after_training=args.zero_sd_after_training, hard_abstract_state=args.hard_abstract_state, freeze_hmm_no_entropy_at=args.freeze_hmm_no_entropy_at, cluster_predict_qs=args.cluster_predict_qs, cluster_predict_qs_weight=args.cluster_predict_qs_weight, prune_threshold=args.prune_threshold, prune_abstraction=args.prune_abstraction, prune_abstraction_new_means=args.prune_abstraction_new_means, sample_abstract_state=args.sample_abstract_state, softmax_policy=args.softmax_policy, softmax_policy_temp=args.softmax_policy_temp, discount=args.discount, fix_prior_training=args.fix_prior_training, model_learning_rate=model_learning_rate, q_scaling_factor=args.q_scaling_factor, eval_episodes=args.eval_episodes) runner.setup() runner.main_training_loop() if args.post_train_hmm: runner.post_hmm_training_loop(args.post_train_hmm_steps) if args.save_model: runner.save_model() runner.evaluate_and_visualize() runner.close_model_session()