def run(): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" game_name = 'Assault-v0' deg_type = 'CVAE' # model_number = 810000 if game_name == 'Assault-v0': config_path = "../environment_settings/assault_v0_config.yaml" elif game_name == 'Breakout-v0': config_path = "../environment_settings/breakout_v0_config.yaml" elif game_name == 'SpaceInvaders-v0': config_path = "../environment_settings/space_invaders_v0_config.yaml" elif game_name == 'flappybird': config_path = "../environment_settings/flappybird_config.yaml" elif game_name == 'icehockey': config_path = '../environment_settings/icehockey_config.yaml' elif game_name == 'Enduro-v0': config_path = '../environment_settings/enduro_v0_config.yaml' # elif game_name == 'Enduro-v1': # config_path = '../environment_settings/enduro_v1_config.yaml' else: raise ValueError("Unknown game name {0}".format(game_name)) print("Running environment {0}".format(game_name)) deg_config = DRLMimicConfig.load(config_path) local_test_flag = False if local_test_flag: deg_config.DEG.FVAE.dset_dir = '../example_data' global_model_data_path = '' deg_config.Mimic.Learn.episodic_sample_number = 49 elif os.path.exists("/Local-Scratch/oschulte/Galen"): global_model_data_path = "/Local-Scratch/oschulte/Galen" elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"): global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter" else: raise EnvironmentError("Unknown running setting, please set up your own environment") DEG = Disentanglement(config=deg_config, deg_type=deg_type, global_model_data_path=global_model_data_path) if deg_type == 'CVAE': DEG.train_cvae() elif deg_type == 'VAE': DEG.train_fvae(apply_tc=False) elif deg_type == 'FVAE': DEG.train_fvae(apply_tc=True) elif deg_type == 'AAE': DEG.train_aae() else: raise ValueError('Unknown deg type {0}'.format(deg_type))
def run(): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" game_name = 'flappybird' print('Running game {0}'.format(game_name)) if game_name == 'flappybird': mimic_env_config_path = "../environment_settings/" \ "flappybird_config.yaml" elif game_name == 'Assault-v0': mimic_env_config_path = "../environment_settings/" \ "assault_v0_config.yaml" elif game_name == 'Breakout-v0': mimic_env_config_path = "../environment_settings/" \ "breakout_v0_config.yaml" elif game_name == 'SpaceInvaders-v0': mimic_env_config_path = "../environment_settings/" \ "space_invaders_v0_config.yaml" else: raise ValueError("Unknown game name {0}".format(game_name)) mimic_config = DRLMimicConfig.load(mimic_env_config_path) local_test_flag = True if local_test_flag: mimic_config.DRL.Learn.data_save_path = '../example_data/flappybird/' mimic_config.DRL.Learn.ckpt_dir = '../data_generator/saved_models/' global_model_data_path = '' elif os.path.exists("/Local-Scratch/oschulte/Galen"): global_model_data_path = "/Local-Scratch/oschulte/Galen" elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"): global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter" else: raise EnvironmentError( "Unknown running setting, please set up your own environment") data_generator = DRLDataGenerator( game_name=game_name, config=mimic_config, global_model_data_path=global_model_data_path, local_test_flag=local_test_flag) data_generator.test_model_and_generate_data()
def run(): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" if opts.GAME_NAME is not None: game_name = opts.GAME_NAME else: game_name = 'flappybird' opts.ACTION_ID = 0 # config_game_name = 'assault_v0' if opts.METHOD_NAME is not None: method = opts.METHOD_NAME else: method = 'mcts' if opts.PLAY is not None: play = int(opts.PLAY) else: play = None if opts.C_PUCT is not None: c_puct = float(opts.C_PUCT) else: c_puct = None disentangler_name = opts.De_Name if game_name == 'flappybird': model_name = '{0}-1000000'.format(disentangler_name) config_game_name = 'flappybird' elif game_name == 'SpaceInvaders-v0': model_name = '{0}-1000000'.format(disentangler_name) config_game_name = "space_invaders_v0" elif game_name == 'Assault-v0': model_name = '{0}-1000000'.format(disentangler_name) config_game_name = 'assault_v0' elif game_name == 'Breakout-v0': model_name = '{0}-1000000'.format(disentangler_name) config_game_name = 'breakout_v0' else: raise ValueError("Unknown game name {0}".format(game_name)) local_test_flag = False if local_test_flag: mimic_config_path = "../environment_settings/{0}_config.yaml".format( config_game_name) mimic_config = DRLMimicConfig.load(mimic_config_path) mimic_config.DEG.FVAE.dset_dir = '../example_data' global_model_data_path = '' mimic_config.Mimic.Learn.episodic_sample_number = 49 elif os.path.exists("/Local-Scratch/oschulte/Galen"): mimic_config_path = "../environment_settings/{0}_config.yaml".format( config_game_name) mimic_config = DRLMimicConfig.load(mimic_config_path) global_model_data_path = "/Local-Scratch/oschulte/Galen" elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"): mimic_config_path = "/home/functor/scratch/Galen/project-DRL-Interpreter/statistical-DRL-interpreter/" \ "environment_settings/{0}_config.yaml".format(config_game_name) mimic_config = DRLMimicConfig.load(mimic_config_path) global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter" else: raise EnvironmentError( "Unknown running setting, please set up your own environment") print('global path is : {0}'.format(global_model_data_path)) if opts.LOG_DIR is not None: if os.path.exists(opts.LOG_DIR): log_file = open(opts.LOG_DIR, 'a') else: log_file = open(opts.LOG_DIR, 'w') else: log_file = None try: print("\nRunning for game {0} with {1}".format(game_name, method), file=log_file) mimic_learner = MimicLearner( game_name=game_name, method=method, config=mimic_config, deg_model_name=model_name, local_test_flag=local_test_flag, global_model_data_path=global_model_data_path, log_file=log_file) # mimic_learner.test_mimic_model(action_id= int(opts.ACTION_ID), log_file=log_file) shell_round_number = int( opts.ROUND_NUMBER) if opts.ROUND_NUMBER is not None else None mimic_learner.train_mimic_model( action_id=int(opts.ACTION_ID), shell_round_number=shell_round_number, log_file=log_file, launch_time=opts.LAUNCH_TIME, disentangler_name=disentangler_name, data_type='latent', run_mcts=True, c_puct=c_puct, play=play, ) if log_file is not None: log_file.close() except Exception as e: traceback.print_exc(file=log_file) if log_file is not None: log_file.write(str(e)) log_file.flush() log_file.close()
config_game_name = 'flappybird' elif game_name == 'SpaceInvaders-v0': model_name = 'FVAE-1000000' config_game_name = "space_invaders_v0" elif game_name == 'Assault-v0': model_name = 'FVAE-1000000' config_game_name = 'assault_v0' elif game_name == 'Breakout-v0': model_name = 'FVAE-1000000' config_game_name = 'breakout_v0' else: raise ValueError("Unknown game name {0}".format(game_name)) mimic_config_path = "./environment_settings/{0}_config.yaml".format( config_game_name) mimic_config = DRLMimicConfig.load(mimic_config_path) dientangler = Disentanglement(mimic_config, 'FVAE', False, global_model_data_path) dientangler.load_checkpoint(ckptname=model_name, testing_flag=True, log_file=None) for aid in [0]: data_save_dir = '/Local-Scratch/oschulte/Galen/DRL-interpreter-model/data' training_data_action = data_loader(episode_number=4, action_id=aid, data_save_dir=data_save_dir, dientangler=dientangler, image_type=image_type,
def run_generate_values(): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" game_name = 'SpaceInvaders-v0' method = 'mcts' disentangler_name = 'CVAE' action_id = 4 if game_name == 'Assault-v0': # action_ids = [2, 3, 4] # {0: 118, 1: 165, 2: 1076, 3: 1293, 4: 1246, 5: 50, 6: 52} model_name = '{0}-1000000'.format(disentangler_name) config_path = "../environment_settings/assault_v0_config.yaml" elif game_name == 'SpaceInvaders-v0': model_name = '{0}-1000000'.format(disentangler_name) config_path = "../environment_settings/space_invaders_v0_config.yaml" elif game_name == 'flappybird': # action_ids = [0, 1] model_name = '{0}-1000000'.format(disentangler_name) config_path = "../environment_settings/flappybird_config.yaml" else: raise ValueError("Unknown game name {0}".format(game_name)) if method == 'mcts': img_type = 'latent' options_dict = { 'flappybird':['max_node', None, 'cpuct', 0.1, 'play', 200], 'SpaceInvaders-v0': ['max_node', None, 'cpuct', 0.1, 'play', 200], } elif method == 'cart-fvae': img_type = 'latent' options_dict = { 'flappybird': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 20], 'SpaceInvaders-v0': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 3], } # elif method == 'cart': # img_type = 'raw' # options_dict = { # 'flappybird': ['max_leaf_nodes', None, 'criterion', 'mae', 'random', 'min_samples_leaf', 1], # } else: raise ValueError("unknown model name {0}".format(method)) options = options_dict[game_name] option_str = '-'.join([str(option) for option in options]) training_results_saving_dir = '../results/plot_results/{0}/training-{4}-{0}-action{1}' \ '-by-splits-results-{2}-{3}.txt'.format(game_name, action_id, method, option_str, disentangler_name) training_results_writer = open(training_results_saving_dir, 'w') train_results_csv_writer = csv.writer(training_results_writer) testing_results_saving_dir = '../results/plot_results/{0}/testing-{4}-{0}-action{1}' \ '-by-splits-results-{2}-{3}.txt'.format(game_name, action_id, method, option_str, disentangler_name) testing_results_writer = open(testing_results_saving_dir, 'w') test_results_csv_writer = csv.writer(testing_results_writer) local_test_flag = False if local_test_flag: mimic_config = DRLMimicConfig.load(config_path) mimic_config.DEG.FVAE.dset_dir = '../example_data' global_model_data_path = '' mimic_config.Mimic.Learn.episodic_sample_number = 49 elif os.path.exists("/Local-Scratch/oschulte/Galen"): mimic_config = DRLMimicConfig.load(config_path) global_model_data_path = "/Local-Scratch/oschulte/Galen" elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"): mimic_config = DRLMimicConfig.load(config_path) global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter" else: raise EnvironmentError("Unknown running setting, please set up your own environment") print('global path is : {0}'.format(global_model_data_path)) if opts.LOG_DIR is not None: if os.path.exists(opts.LOG_DIR): log_file = open(opts.LOG_DIR, 'a') else: log_file = open(opts.LOG_DIR, 'w') else: log_file=None print("\nRunning for game {0} with {1}".format(game_name, method), file=log_file) mimic_learner = MimicLearner(game_name=game_name, method=method, config=mimic_config, deg_model_name=model_name, local_test_flag=local_test_flag, global_model_data_path=global_model_data_path, log_file=log_file, options=options) # for action_id in [1]: mimic_learner.iteration_number = 0 train_results_csv_writer.writerow(['return_value_log', 'return_value_log_struct', 'return_value_var_reduction', 'return_value_var_reduction_by_leaf', 'mae', 'rmse', 'leaves']) if method == 'mcts': # mimic_learner.data_loader(episode_number=4, target="latent", action_id=action_id) mimic_learner.static_data_loader(action_id, log_file, img_type, training_flag=True) mimic_learner.mimic_env.assign_data(mimic_learner.memory) saved_nodes_dir = mimic_learner.get_MCTS_nodes_dir(action_id, disentangler_name) return_value_log_all, return_value_log_struct_all, return_value_var_reduction_all, \ return_value_var_reduction_by_leaf_all, mae_all, rmse_all, leaves_number_all = mimic_learner.predict_mcts_by_splits(action_id, saved_nodes_dir) elif method == 'cart-fvae' or method == 'cart': mimic_learner.static_data_loader(action_id, log_file, img_type, training_flag=True) # target = "latent" if method == 'cart-fvae' else "raw" # mimic_learner.data_loader(episode_number=4, target=target, action_id=action_id) mimic_learner.mimic_env.assign_data(mimic_learner.memory) return_value_log_all = [] return_value_log_struct_all = [] return_value_var_reduction_all = [] mae_all = [] rmse_all = [] leaves_number_all = [] init_state, init_var_list = mimic_learner.mimic_env.initial_state(action=action_id) training_data = [[], []] for data_index in init_state[0]: data_input = np.concatenate([mimic_learner.memory[data_index][0]], axis=0) data_output = mimic_learner.memory[data_index][4] training_data[0].append(data_input) training_data[1].append(data_output) for i in range(2, 301): save_model_dir = mimic_learner.global_model_data_path + '/DRL-interpreter-model/comparison' \ '/cart/{0}/{1}-aid{2}-node{3}' \ '-sklearn.model'.format(mimic_learner.game_name, mimic_learner.method, action_id, i) mimic_learner.mimic_model.max_leaf_nodes = i return_value_log, return_value_log_struct, \ return_value_var_reduction, mae, rmse, leaves_number \ = mimic_learner.mimic_model.train_mimic(training_data=training_data, save_model_dir=save_model_dir, mimic_env=mimic_learner.mimic_env, log_file=log_file) return_value_log_all.append(return_value_log) return_value_log_struct_all.append(return_value_log_struct) return_value_var_reduction_all.append(return_value_var_reduction) mae_all.append(mae) rmse_all.append(rmse) leaves_number_all.append(leaves_number) else: raise ValueError("Unknown method {0}".format(method)) j = 1 if method == 'mcts' else 0 # skip some redundant results for i in range(j, len(return_value_log_all)): train_results_csv_writer.writerow([round(return_value_log_all[i], 4), round(return_value_log_struct_all[i], 4), round(return_value_var_reduction_all[i], 8), round(return_value_var_reduction_by_leaf_all[i], 10), round(mae_all[i], 4), round(rmse_all[i], 4), leaves_number_all[i]]) mimic_learner.iteration_number = int(mimic_learner.episodic_sample_number * 45) test_results_csv_writer.writerow(['return_value_log', 'return_value_log_struct', 'return_value_var_reduction', 'return_value_var_reduction_by_leaf', 'mae', 'rmse', 'leaves']) return_value_log_record = [] return_value_log_struct_record = [] return_value_var_reduction_record = [] return_value_var_reduction_by_leaf_record = [] mae_record = [] rmse_record = [] for test_id in range(5): if method == 'mcts': mimic_learner.static_data_loader(action_id, log_file, img_type, training_flag=False, test_id=test_id) # mimic_learner.data_loader(episode_number=45.5, target="latent", action_id=action_id) mimic_learner.mimic_env.assign_data(mimic_learner.memory) saved_nodes_dir = mimic_learner.get_MCTS_nodes_dir(action_id, disentangler_name) return_value_log_all, return_value_log_struct_all, return_value_var_reduction_all, \ return_value_var_reduction_by_leaf_all, mae_all, rmse_all, leaves_number_all = mimic_learner.predict_mcts_by_splits(action_id, saved_nodes_dir) elif method == 'cart-fvae' or method == "cart": # target = "latent" if method == 'cart-fvae' else "raw" # mimic_learner.data_loader(episode_number=45.5, target=target, action_id=action_id) mimic_learner.static_data_loader(action_id, log_file, img_type, training_flag=False, test_id=test_id) mimic_learner.mimic_env.assign_data(mimic_learner.memory) return_value_log_all = [] return_value_log_struct_all = [] return_value_var_reduction_all = [] return_value_var_reduction_by_leaf_all = [] mae_all = [] rmse_all = [] leaves_number_all = [] init_state, init_var_list = mimic_learner.mimic_env.initial_state(action=action_id) testing_data = [[], []] for data_index in init_state[0]: data_input = mimic_learner.memory[data_index][0] data_output = mimic_learner.memory[data_index][4] testing_data[0].append(data_input) testing_data[1].append(data_output) testing_data[0] = np.stack(testing_data[0], axis=0) for i in range(2, 301): save_model_dir = mimic_learner.global_model_data_path + '/DRL-interpreter-model/comparison' \ '/cart/{0}/{1}-aid{2}-node{3}' \ '-sklearn.model'.format(mimic_learner.game_name, mimic_learner.method, action_id, i) return_value_log, return_value_log_struct, \ return_value_var_reduction, mae, rmse, leaves_number \ = mimic_learner.mimic_model.test_mimic(testing_data=testing_data, save_model_dir=save_model_dir, mimic_env=mimic_learner.mimic_env, log_file=log_file) return_value_log_all.append(return_value_log) return_value_log_struct_all.append(return_value_log_struct) return_value_var_reduction_all.append(return_value_var_reduction) return_value_var_reduction_by_leaf_all.append(float(return_value_var_reduction)/i) mae_all.append(mae) rmse_all.append(rmse) leaves_number_all.append(leaves_number) else: raise ValueError("Unknown method {0}".format(method)) return_value_log_record.append(return_value_log_all) return_value_log_struct_record.append(return_value_log_struct_all) return_value_var_reduction_record.append(return_value_var_reduction_all) return_value_var_reduction_by_leaf_record.append(return_value_var_reduction_by_leaf_all) mae_record.append(mae_all) rmse_record.append(rmse_all) return_value_log_record_mean = np.mean(np.asarray(return_value_log_record), axis=0) return_value_log_record_var = np.var(np.asarray(return_value_log_record), axis=0) return_value_log_struct_record_mean = np.mean(np.asarray(return_value_log_struct_record), axis=0) return_value_log_struct_record_var = np.var(np.asarray(return_value_log_struct_record), axis=0) return_value_var_reduction_record_mean = np.mean(np.asarray(return_value_var_reduction_record), axis=0) return_value_var_reduction_record_var = np.var(np.asarray(return_value_var_reduction_record), axis=0) return_value_var_reduction_by_leaf_record_mean = np.mean(np.asarray(return_value_var_reduction_by_leaf_record), axis=0) return_value_var_reduction_by_leaf_record_var = np.var(np.asarray(return_value_var_reduction_by_leaf_record), axis=0) mae_record_mean = np.mean(np.asarray(mae_record), axis=0) mae_record_var = np.var(np.asarray(mae_record), axis=0) rmse_record_mean = np.mean(np.asarray(rmse_record), axis=0) rmse_record_var = np.var(np.asarray(rmse_record), axis=0) for i in range(j, len(return_value_log_all)): test_results_csv_writer.writerow(["{0}({1})".format(round(return_value_log_record_mean[i], 4), round(return_value_log_record_var[i], 8)), "{0}({1})".format(round(return_value_log_struct_record_mean[i], 4), round(return_value_log_struct_record_var[i], 8)), "{0}({1})".format(round(return_value_var_reduction_record_mean[i], 5), round(return_value_var_reduction_record_var[i], 10)), "{0}({1})".format(round(return_value_var_reduction_by_leaf_record_mean[i], 5), round(return_value_var_reduction_by_leaf_record_var[i], 10)), "{0}({1})".format(round(mae_record_mean[i], 4), round(mae_record_var[i], 8)), "{0}({1})".format(round(rmse_record_mean[i], 4), round(rmse_record_var[i], 8)), leaves_number_all[i] ])
def run(game_name=None, disentangler_name=None, run_tmp_test=False, method = None, iter_test_num=5): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = "1" if game_name is None: game_name = 'Assault-v0' if disentangler_name is None: disentangler_name = 'CVAE' if method is None: method = 'cart-fvae' if game_name == 'flappybird': action_ids = [0] model_name = '{0}-None'.format(disentangler_name) config_path = "../environment_settings/flappybird_config.yaml" elif game_name == 'SpaceInvaders-v0': action_ids = [4] model_name = '{0}-None'.format(disentangler_name) config_path = "../environment_settings/space_invaders_v0_config.yaml" elif game_name == 'Enduro-v0': action_ids = [7] # 1: speed, 7 right, 8 left model_name = '{0}-None'.format(disentangler_name) config_path = "../environment_settings/enduro_v0_config.yaml" elif game_name == 'Assault-v0': action_ids = [4] # 2: shot, 3 right, 4 left model_name = '{0}-None'.format(disentangler_name) config_path = '../environment_settings/assault_v0_config.yaml' else: raise ValueError("Unknown game name {0}".format(game_name)) if method == 'mcts': options_dict = { 'flappybird':['max_node', 30, 'cpuct', 0.1, 'play_number', 200], # 'Assault-v0':[] } action_ids = [0] data_type = 'latent' elif method == 'cart': disentangler_name = None options_dict = { 'flappybird': ['max_leaf_nodes', None, 'criterion', 'mae', 'random', 'min_samples_leaf', 2], 'Assault-v0': ['max_leaf_nodes', None, 'criterion', 'mae', 'random', 'min_samples_leaf', 4], 'SpaceInvaders-v0': ['max_leaf_nodes', None, 'criterion', 'mae', 'best', 'min_samples_leaf',2], 'Enduro-v0': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 1], } data_type = 'binary' # data_type = 'raw' elif method == 'cart-fvae': options_dict = { 'flappybird': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 15], 'Assault-v0': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 14], 'SpaceInvaders-v0': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 6], # 'Assault-v0': ['max_leaf_nodes', 80, 'criterion', 'mse', 'best', 'min_samples_leaf', 1], } data_type = 'latent' elif method == 'm5-rt': # m5 regression tree disentangler_name = None options_dict = { 'flappybird': ["-R", "-N", "-M", "10"], # 'Assault-v0': ["-R", "-N", "-M", "20"], 'SpaceInvaders-v0': ["-R", "-N", "-M", "5"], } data_type = 'color' # options = ["-R"] elif method == 'm5-mt': # m5 model tree # options = ["-M", "10"] disentangler_name = None options_dict = { 'flappybird':["-N", "-M", "10"], # 'Assault-v0':["-N", "-M", "25"], 'SpaceInvaders-v0': ["-N", "-M", "5"], } data_type = 'color' else: raise ValueError("unknown model name {0}".format(method)) options = options_dict[game_name] option_str = '-'.join([str(option) for option in options]) results_saving_dir = '../results/comparison_results/{0}/{0}-results-{1}-{2}-{3}.txt'.format(game_name, method, option_str, disentangler_name) results_writer = open(results_saving_dir, 'w') local_test_flag = False if local_test_flag: mimic_config = DRLMimicConfig.load(config_path) mimic_config.DEG.FVAE.dset_dir = '../example_data' global_model_data_path = '' mimic_config.Mimic.Learn.episodic_sample_number = 49 elif os.path.exists("/Local-Scratch/oschulte/Galen"): mimic_config = DRLMimicConfig.load(config_path) global_model_data_path = "/Local-Scratch/oschulte/Galen" elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"): mimic_config = DRLMimicConfig.load(config_path) global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter" else: raise EnvironmentError("Unknown running setting, please set up your own environment") print('global path is : {0}'.format(global_model_data_path)) log_file = None train_record_results = {'return_value_log':[], 'return_value_log_struct':[], 'return_value_var_reduction':[], 'mae':[], 'rmse':[], 'leaves_number':[], 'results_strs':[]} testing_record_results_all = [] for test_id in range(iter_test_num): testing_record_results = {'return_value_log':[], 'return_value_log_struct':[], 'return_value_var_reduction':[], 'mae': [], 'rmse': [], 'leaves_number': [], 'results_strs':[]} testing_record_results_all.append(testing_record_results) # try: print("\nRunning for game {0} with {1}".format(game_name, method), file=log_file) mimic_learner = MimicLearner(game_name=game_name, method=method, config=mimic_config, deg_model_name=model_name, local_test_flag=local_test_flag, global_model_data_path=global_model_data_path, log_file=log_file, options=options) for action_id in action_ids: # for action_id in [1]: mimic_learner.iteration_number = 0 [return_value_log, return_value_log_struct, return_value_var_reduction, mae, rmse, leaves_number, results_str] = mimic_learner.train_mimic_model(action_id=action_id, shell_round_number=None, log_file=log_file, launch_time=None, data_type=data_type, run_mcts=False, disentangler_name=disentangler_name, run_tmp_test=run_tmp_test ) train_record_results['return_value_log'].append(return_value_log) train_record_results['return_value_log_struct'].append(return_value_log_struct) train_record_results['return_value_var_reduction'].append(return_value_var_reduction) train_record_results['mae'].append(mae) train_record_results['rmse'].append(rmse) train_record_results['leaves_number'].append(leaves_number) train_record_results['results_strs'].append(results_str) for test_id in range(iter_test_num): [return_value_log, return_value_log_struct, return_value_var_reduction, mae, rmse, leaves_number, results_str] = mimic_learner.test_mimic_model(action_id= action_id, log_file=log_file, data_type=data_type, disentangler_name=disentangler_name, run_tmp_test=run_tmp_test, test_id=test_id) testing_record_results_all[test_id]['return_value_log'].append(return_value_log) testing_record_results_all[test_id]['return_value_log_struct'].append(return_value_log_struct) testing_record_results_all[test_id]['return_value_var_reduction'].append(return_value_var_reduction) testing_record_results_all[test_id]['mae'].append(mae) testing_record_results_all[test_id]['rmse'].append(rmse) testing_record_results_all[test_id]['leaves_number'].append(leaves_number) testing_record_results_all[test_id]['results_strs'].append(results_str) # except Exception as e: # traceback.print_exc(file=log_file) # results_writer.close() # if log_file is not None: # log_file.write(str(e)) # log_file.flush() # log_file.close() # # sys.stderr.write('finish shell round {0}'.format(shell_round_number)) for results_str in train_record_results['results_strs']: results_writer.write(results_str+'\n') mean_train_return_value_log= np.mean(train_record_results['return_value_log']) mean_train_return_value_log_struct = np.mean(train_record_results['return_value_log_struct']) mean_train_return_value_var_reduction= np.mean(train_record_results['return_value_var_reduction']) mean_train_mae = np.mean(train_record_results['mae']) mean_train_rmse = np.mean(train_record_results['rmse']) results_str = "Training method {0}: Avg.return_value_log:{1}, " \ "Avg.return_value_log_struct:{2}, Avg.return_value_var_reduction:{3}," \ "Avg.mae:{4}, Avg.rmse:{5}, Avg.leaves:{6}\n\n".format(method, str(mean_train_return_value_log)+ "({0})".format( float(mean_train_return_value_log) / leaves_number), str(mean_train_return_value_log_struct) + "({0})".format( float(mean_train_return_value_log_struct) / leaves_number), str(mean_train_return_value_var_reduction) + "({0})".format( float(mean_train_return_value_var_reduction) / leaves_number), str(mean_train_mae) + "({0})".format( float(mean_train_mae) / leaves_number), str(mean_train_rmse) + "({0})".format( float(mean_train_rmse) / leaves_number), np.mean(train_record_results['leaves_number'])) results_writer.write(results_str) test_return_value_log_all = [] test_return_value_log_struct_all = [] test_return_value_var_reduction_all = [] test_return_value_var_reduction_per_leaf_all = [] test_mae_all = [] test_rmse_all = [] test_leaf_num_all = [] for test_id in range(iter_test_num): testing_record_results = testing_record_results_all[test_id] for results_str in testing_record_results['results_strs']: results_writer.write(results_str+'iter{0}\n'.format(test_id)) mean_test_return_value_log= np.mean(testing_record_results['return_value_log']) mean_test_return_value_log_struct = np.mean(testing_record_results['return_value_log_struct']) mean_test_return_value_var_reduction= np.mean(testing_record_results['return_value_var_reduction']) mean_test_mae = np.mean(testing_record_results['mae']) mean_test_rmse = np.mean(testing_record_results['rmse']) results_str = "Testing method {0} iter{7}: Avg.return_value_log:{1}, " \ "Avg.return_value_log_struct:{2}, Avg.return_value_var_reduction:{3}," \ "Avg.mae:{4}, Avg.rmse:{5}, Avg.leaves:{6}\n\n".format(method, str(mean_test_return_value_log)+ "({0})".format( float(mean_test_return_value_log) / leaves_number), str(mean_test_return_value_log_struct) + "({0})".format( float(mean_test_return_value_log_struct) / leaves_number), str(mean_test_return_value_var_reduction) + "({0})".format( float(mean_test_return_value_var_reduction) / leaves_number), str(mean_test_mae) + "({0})".format( float(mean_test_mae) / leaves_number), str(mean_test_rmse) + "({0})".format( float(mean_test_rmse) / leaves_number), np.mean(testing_record_results['leaves_number']), test_id) results_writer.write(results_str) test_return_value_log_all.append(mean_test_return_value_log) test_return_value_log_struct_all.append(mean_test_return_value_log_struct) test_return_value_var_reduction_all.append(mean_test_return_value_var_reduction) test_return_value_var_reduction_per_leaf_all.append(mean_test_return_value_var_reduction/ leaves_number) test_mae_all.append(mean_test_mae) test_rmse_all.append(mean_test_rmse) test_leaf_num_all.append(leaves_number) results_str = "Testing method {0}: Mean.var_reduction:{1}," \ "Mean.var_reduction_per_leaf: {2}, " \ "Mean.mae:{3}, Mean.rmse:{4}, Mean.leaves:{5}\n\n".format(method, "{0}({1})".format(np.mean(test_return_value_var_reduction_all), np.var(test_return_value_var_reduction_all)), "{0}({1})".format(np.mean(test_return_value_var_reduction_per_leaf_all), np.var(test_return_value_var_reduction_per_leaf_all)), "{0}({1})".format(np.mean(test_mae_all), np.var(test_mae_all)), "{0}({1})".format(np.mean(test_rmse_all), np.var(test_rmse_all)), "{0}({1})".format(np.mean(test_leaf_num_all), np.var(test_leaf_num_all)), ) results_writer.write(results_str) print(results_str, file=log_file) results_writer.close() if 'mcts' not in method: mimic_learner.mimic_model.__del__() if log_file is not None: log_file.close()
def run_static_data_generation(model_number=None, game_name=None, disentangler_type = None, image_type = None, global_model_data_path = "/Local-Scratch/oschulte/Galen", run_tmp_test=False, test_run=5): # game_name = 'flappybird' # image_type = 'latent' # disentangler_type = 'CVAE' # global_model_data_path = "/Local-Scratch/oschulte/Galen" if game_name is None: game_name = "Assault-v0" if disentangler_type is None: disentangler_type = None if image_type is None: image_type = 'binary' if model_number is None: model_number = None if run_tmp_test: tmp_msg = 'tmp_' else: tmp_msg = '' if game_name == 'flappybird': model_name = '{0}-{1}'.format(disentangler_type, model_number) config_game_name = 'flappybird' aids = [0] elif game_name == 'SpaceInvaders-v0': model_name = '{0}-{1}'.format(disentangler_type, model_number) config_game_name = "space_invaders_v0" aids = [4] elif game_name == 'Enduro-v0': model_name = '{0}-{1}'.format(disentangler_type, model_number) config_game_name = "enduro_v0" aids = [7] elif game_name == 'Assault-v0': model_name = '{0}-{1}'.format(disentangler_type, model_number) config_game_name = 'assault_v0' aids = [4] # 2: shot, 3 right, 4 left elif game_name == 'Breakout-v0': model_name = '{0}-{1}'.format(disentangler_type, model_number) config_game_name = 'breakout_v0' else: raise ValueError("Unknown game name {0}".format(game_name)) mimic_config_path = "../environment_settings/{0}_config.yaml".format(config_game_name) mimic_config = DRLMimicConfig.load(mimic_config_path) if image_type != 'latent': data_loader = return_data(mimic_config.DEG.Learn, global_model_data_path, mimic_config.DRL.Learn.actions, image_type=image_type) disentangler = None else: data_loader = return_data(mimic_config.DEG.Learn, global_model_data_path, mimic_config.DRL.Learn.actions, image_type='origin') disentangler = Disentanglement(mimic_config, disentangler_type, False, global_model_data_path) disentangler.load_checkpoint(ckptname= model_name, testing_flag=True, log_file=None) for aid in aids: data_save_dir = '/Local-Scratch/oschulte/Galen/DRL-interpreter-model/data' training_data_action = data_builder(episode_number=4, action_id=aid, data_save_dir=data_save_dir, dientangler=disentangler, image_type=image_type, game_name = game_name, iteration_number=0, disentangler_type=disentangler_type, data_loader=data_loader, action_number=mimic_config.DRL.Learn.actions) impact_file_name_training = '{5}impact_training_{4}_data_{1}_action_{2}.csv'.format( image_type, game_name, aid, disentangler_type, image_type, tmp_msg) impact_file_Writer_training = open('../LMUT_data/' + impact_file_name_training, 'w') print('Writing training csv for action {}...'.format(aid)) write_header(impact_file_Writer_training, image_type=image_type) write_data_text(training_data_action, impact_file_Writer_training) impact_file_Writer_training.close() iteration_number = 1000 * 45 testing_data_action = data_builder(episode_number=46, action_id=aid, data_save_dir=data_save_dir, dientangler=disentangler, image_type=image_type, game_name = game_name, iteration_number=iteration_number, disentangler_type=disentangler_type, data_loader=data_loader, action_number=mimic_config.DRL.Learn.actions) for i in range(test_run): testing_data_action_iter = testing_data_action[i*100:(i+5)*100] # testing_data_action_iter = random.sample(testing_data_action, 500) # for j in range(int(len(testing_data_action)/test_run)): # testing_data_action_iter.append(testing_data_action[iter_test]) # iter_test += 1 # create training and testing files impact_file_name_testing = '{5}impact_testing_{4}_data_{1}_action_{2}_iter{6}.csv'.format( image_type, game_name, aid, disentangler_type, image_type, tmp_msg, i) impact_file_Writer_testing = open('../LMUT_data/' + impact_file_name_testing, 'w') print('Writing testing csv for action {0} in iter {1}...'.format(aid, i)) write_header(impact_file_Writer_testing, image_type=image_type) write_data_text(testing_data_action_iter, impact_file_Writer_testing) impact_file_Writer_testing.close()