示例#1
0
def run():
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    game_name = 'Assault-v0'
    deg_type = 'CVAE'
    # model_number = 810000
    if game_name == 'Assault-v0':
        config_path = "../environment_settings/assault_v0_config.yaml"
    elif game_name == 'Breakout-v0':
        config_path = "../environment_settings/breakout_v0_config.yaml"
    elif game_name == 'SpaceInvaders-v0':
        config_path = "../environment_settings/space_invaders_v0_config.yaml"
    elif game_name == 'flappybird':
        config_path = "../environment_settings/flappybird_config.yaml"
    elif game_name == 'icehockey':
        config_path = '../environment_settings/icehockey_config.yaml'
    elif game_name == 'Enduro-v0':
        config_path = '../environment_settings/enduro_v0_config.yaml'
    # elif game_name == 'Enduro-v1':
    #     config_path = '../environment_settings/enduro_v1_config.yaml'
    else:
        raise ValueError("Unknown game name {0}".format(game_name))

    print("Running environment {0}".format(game_name))

    deg_config = DRLMimicConfig.load(config_path)
    local_test_flag = False
    if local_test_flag:
        deg_config.DEG.FVAE.dset_dir = '../example_data'
        global_model_data_path = ''
        deg_config.Mimic.Learn.episodic_sample_number = 49
    elif os.path.exists("/Local-Scratch/oschulte/Galen"):
        global_model_data_path = "/Local-Scratch/oschulte/Galen"
    elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"):
        global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter"
    else:
        raise EnvironmentError("Unknown running setting, please set up your own environment")

    DEG = Disentanglement(config=deg_config, deg_type=deg_type,
                          global_model_data_path=global_model_data_path)
    if deg_type == 'CVAE':
        DEG.train_cvae()
    elif deg_type == 'VAE':
        DEG.train_fvae(apply_tc=False)
    elif deg_type == 'FVAE':
        DEG.train_fvae(apply_tc=True)
    elif deg_type == 'AAE':
        DEG.train_aae()
    else:
        raise ValueError('Unknown deg type {0}'.format(deg_type))
示例#2
0
def run():
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    game_name = 'flappybird'
    print('Running game {0}'.format(game_name))
    if game_name == 'flappybird':
        mimic_env_config_path = "../environment_settings/" \
                                 "flappybird_config.yaml"
    elif game_name == 'Assault-v0':
        mimic_env_config_path = "../environment_settings/" \
                                 "assault_v0_config.yaml"
    elif game_name == 'Breakout-v0':
        mimic_env_config_path = "../environment_settings/" \
                                 "breakout_v0_config.yaml"
    elif game_name == 'SpaceInvaders-v0':
        mimic_env_config_path = "../environment_settings/" \
                                 "space_invaders_v0_config.yaml"
    else:
        raise ValueError("Unknown game name {0}".format(game_name))

    mimic_config = DRLMimicConfig.load(mimic_env_config_path)

    local_test_flag = True
    if local_test_flag:
        mimic_config.DRL.Learn.data_save_path = '../example_data/flappybird/'
        mimic_config.DRL.Learn.ckpt_dir = '../data_generator/saved_models/'
        global_model_data_path = ''
    elif os.path.exists("/Local-Scratch/oschulte/Galen"):
        global_model_data_path = "/Local-Scratch/oschulte/Galen"
    elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"):
        global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter"
    else:
        raise EnvironmentError(
            "Unknown running setting, please set up your own environment")

    data_generator = DRLDataGenerator(
        game_name=game_name,
        config=mimic_config,
        global_model_data_path=global_model_data_path,
        local_test_flag=local_test_flag)
    data_generator.test_model_and_generate_data()
def run():
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    if opts.GAME_NAME is not None:
        game_name = opts.GAME_NAME
    else:
        game_name = 'flappybird'
        opts.ACTION_ID = 0
        # config_game_name = 'assault_v0'

    if opts.METHOD_NAME is not None:
        method = opts.METHOD_NAME
    else:
        method = 'mcts'

    if opts.PLAY is not None:
        play = int(opts.PLAY)
    else:
        play = None

    if opts.C_PUCT is not None:
        c_puct = float(opts.C_PUCT)
    else:
        c_puct = None

    disentangler_name = opts.De_Name

    if game_name == 'flappybird':
        model_name = '{0}-1000000'.format(disentangler_name)
        config_game_name = 'flappybird'
    elif game_name == 'SpaceInvaders-v0':
        model_name = '{0}-1000000'.format(disentangler_name)
        config_game_name = "space_invaders_v0"
    elif game_name == 'Assault-v0':
        model_name = '{0}-1000000'.format(disentangler_name)
        config_game_name = 'assault_v0'
    elif game_name == 'Breakout-v0':
        model_name = '{0}-1000000'.format(disentangler_name)
        config_game_name = 'breakout_v0'
    else:
        raise ValueError("Unknown game name {0}".format(game_name))

    local_test_flag = False
    if local_test_flag:
        mimic_config_path = "../environment_settings/{0}_config.yaml".format(
            config_game_name)
        mimic_config = DRLMimicConfig.load(mimic_config_path)
        mimic_config.DEG.FVAE.dset_dir = '../example_data'
        global_model_data_path = ''
        mimic_config.Mimic.Learn.episodic_sample_number = 49
    elif os.path.exists("/Local-Scratch/oschulte/Galen"):
        mimic_config_path = "../environment_settings/{0}_config.yaml".format(
            config_game_name)
        mimic_config = DRLMimicConfig.load(mimic_config_path)
        global_model_data_path = "/Local-Scratch/oschulte/Galen"
    elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"):
        mimic_config_path = "/home/functor/scratch/Galen/project-DRL-Interpreter/statistical-DRL-interpreter/" \
                                 "environment_settings/{0}_config.yaml".format(config_game_name)
        mimic_config = DRLMimicConfig.load(mimic_config_path)
        global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter"
    else:
        raise EnvironmentError(
            "Unknown running setting, please set up your own environment")

    print('global path is : {0}'.format(global_model_data_path))
    if opts.LOG_DIR is not None:
        if os.path.exists(opts.LOG_DIR):
            log_file = open(opts.LOG_DIR, 'a')
        else:
            log_file = open(opts.LOG_DIR, 'w')
    else:
        log_file = None

    try:
        print("\nRunning for game {0} with {1}".format(game_name, method),
              file=log_file)
        mimic_learner = MimicLearner(
            game_name=game_name,
            method=method,
            config=mimic_config,
            deg_model_name=model_name,
            local_test_flag=local_test_flag,
            global_model_data_path=global_model_data_path,
            log_file=log_file)
        # mimic_learner.test_mimic_model(action_id= int(opts.ACTION_ID), log_file=log_file)
        shell_round_number = int(
            opts.ROUND_NUMBER) if opts.ROUND_NUMBER is not None else None

        mimic_learner.train_mimic_model(
            action_id=int(opts.ACTION_ID),
            shell_round_number=shell_round_number,
            log_file=log_file,
            launch_time=opts.LAUNCH_TIME,
            disentangler_name=disentangler_name,
            data_type='latent',
            run_mcts=True,
            c_puct=c_puct,
            play=play,
        )

        if log_file is not None:
            log_file.close()
    except Exception as e:
        traceback.print_exc(file=log_file)
        if log_file is not None:
            log_file.write(str(e))
            log_file.flush()
            log_file.close()
示例#4
0
        config_game_name = 'flappybird'
    elif game_name == 'SpaceInvaders-v0':
        model_name = 'FVAE-1000000'
        config_game_name = "space_invaders_v0"
    elif game_name == 'Assault-v0':
        model_name = 'FVAE-1000000'
        config_game_name = 'assault_v0'
    elif game_name == 'Breakout-v0':
        model_name = 'FVAE-1000000'
        config_game_name = 'breakout_v0'
    else:
        raise ValueError("Unknown game name {0}".format(game_name))

    mimic_config_path = "./environment_settings/{0}_config.yaml".format(
        config_game_name)
    mimic_config = DRLMimicConfig.load(mimic_config_path)

    dientangler = Disentanglement(mimic_config, 'FVAE', False,
                                  global_model_data_path)
    dientangler.load_checkpoint(ckptname=model_name,
                                testing_flag=True,
                                log_file=None)

    for aid in [0]:
        data_save_dir = '/Local-Scratch/oschulte/Galen/DRL-interpreter-model/data'

        training_data_action = data_loader(episode_number=4,
                                           action_id=aid,
                                           data_save_dir=data_save_dir,
                                           dientangler=dientangler,
                                           image_type=image_type,
示例#5
0
def run_generate_values():
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    game_name = 'SpaceInvaders-v0'
    method = 'mcts'
    disentangler_name = 'CVAE'
    action_id = 4
    if game_name == 'Assault-v0':
        # action_ids = [2, 3, 4]  # {0: 118, 1: 165, 2: 1076, 3: 1293, 4: 1246, 5: 50, 6: 52}
        model_name = '{0}-1000000'.format(disentangler_name)
        config_path = "../environment_settings/assault_v0_config.yaml"
    elif game_name == 'SpaceInvaders-v0':
        model_name = '{0}-1000000'.format(disentangler_name)
        config_path = "../environment_settings/space_invaders_v0_config.yaml"
    elif game_name == 'flappybird':
        # action_ids = [0, 1]
        model_name = '{0}-1000000'.format(disentangler_name)
        config_path = "../environment_settings/flappybird_config.yaml"
    else:
        raise ValueError("Unknown game name {0}".format(game_name))

    if method == 'mcts':
        img_type = 'latent'
        options_dict = {
            'flappybird':['max_node', None, 'cpuct', 0.1, 'play', 200],
            'SpaceInvaders-v0': ['max_node', None, 'cpuct', 0.1, 'play', 200],
        }
    elif method == 'cart-fvae':
        img_type = 'latent'
        options_dict = {
            'flappybird': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 20],
            'SpaceInvaders-v0': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 3],
        }
    # elif method == 'cart':
    #     img_type = 'raw'
    #     options_dict = {
    #         'flappybird': ['max_leaf_nodes', None, 'criterion', 'mae', 'random', 'min_samples_leaf', 1],
    #     }
    else:
        raise ValueError("unknown model name {0}".format(method))
    options = options_dict[game_name]

    option_str = '-'.join([str(option) for option in options])

    training_results_saving_dir = '../results/plot_results/{0}/training-{4}-{0}-action{1}' \
                                  '-by-splits-results-{2}-{3}.txt'.format(game_name, action_id, method, option_str, disentangler_name)
    training_results_writer = open(training_results_saving_dir, 'w')
    train_results_csv_writer = csv.writer(training_results_writer)

    testing_results_saving_dir = '../results/plot_results/{0}/testing-{4}-{0}-action{1}' \
                                 '-by-splits-results-{2}-{3}.txt'.format(game_name, action_id, method, option_str, disentangler_name)
    testing_results_writer = open(testing_results_saving_dir, 'w')
    test_results_csv_writer = csv.writer(testing_results_writer)

    local_test_flag = False
    if local_test_flag:
        mimic_config = DRLMimicConfig.load(config_path)
        mimic_config.DEG.FVAE.dset_dir = '../example_data'
        global_model_data_path = ''
        mimic_config.Mimic.Learn.episodic_sample_number = 49
    elif os.path.exists("/Local-Scratch/oschulte/Galen"):
        mimic_config = DRLMimicConfig.load(config_path)
        global_model_data_path = "/Local-Scratch/oschulte/Galen"
    elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"):
        mimic_config = DRLMimicConfig.load(config_path)
        global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter"
    else:
        raise EnvironmentError("Unknown running setting, please set up your own environment")

    print('global path is : {0}'.format(global_model_data_path))
    if opts.LOG_DIR is not None:
        if os.path.exists(opts.LOG_DIR):
            log_file =  open(opts.LOG_DIR, 'a')
        else:
            log_file = open(opts.LOG_DIR, 'w')
    else:
        log_file=None

    print("\nRunning for game {0} with {1}".format(game_name, method), file=log_file)
    mimic_learner = MimicLearner(game_name=game_name,
                                 method=method,
                                 config=mimic_config,
                                 deg_model_name=model_name,
                                 local_test_flag=local_test_flag,
                                 global_model_data_path=global_model_data_path,
                                 log_file=log_file,
                                 options=options)
    # for action_id in [1]:
    mimic_learner.iteration_number = 0
    train_results_csv_writer.writerow(['return_value_log', 'return_value_log_struct', 'return_value_var_reduction',
                                      'return_value_var_reduction_by_leaf', 'mae', 'rmse', 'leaves'])
    if method == 'mcts':
        # mimic_learner.data_loader(episode_number=4, target="latent", action_id=action_id)
        mimic_learner.static_data_loader(action_id, log_file, img_type, training_flag=True)
        mimic_learner.mimic_env.assign_data(mimic_learner.memory)
        saved_nodes_dir = mimic_learner.get_MCTS_nodes_dir(action_id, disentangler_name)
        return_value_log_all, return_value_log_struct_all, return_value_var_reduction_all, \
        return_value_var_reduction_by_leaf_all, mae_all, rmse_all, leaves_number_all = mimic_learner.predict_mcts_by_splits(action_id, saved_nodes_dir)
    elif method == 'cart-fvae' or method == 'cart':
        mimic_learner.static_data_loader(action_id, log_file, img_type, training_flag=True)
        # target = "latent" if method == 'cart-fvae' else "raw"
        # mimic_learner.data_loader(episode_number=4, target=target, action_id=action_id)
        mimic_learner.mimic_env.assign_data(mimic_learner.memory)
        return_value_log_all = []
        return_value_log_struct_all = []
        return_value_var_reduction_all = []
        mae_all = []
        rmse_all = []
        leaves_number_all = []
        init_state, init_var_list = mimic_learner.mimic_env.initial_state(action=action_id)
        training_data = [[], []]
        for data_index in init_state[0]:
            data_input = np.concatenate([mimic_learner.memory[data_index][0]], axis=0)
            data_output = mimic_learner.memory[data_index][4]
            training_data[0].append(data_input)
            training_data[1].append(data_output)
        for i in range(2, 301):
            save_model_dir = mimic_learner.global_model_data_path + '/DRL-interpreter-model/comparison' \
                                                                    '/cart/{0}/{1}-aid{2}-node{3}' \
                                                                    '-sklearn.model'.format(mimic_learner.game_name,
                                                                                            mimic_learner.method,
                                                                                            action_id,
                                                                                            i)
            mimic_learner.mimic_model.max_leaf_nodes = i
            return_value_log, return_value_log_struct, \
            return_value_var_reduction, mae, rmse, leaves_number \
                = mimic_learner.mimic_model.train_mimic(training_data=training_data,
                                               save_model_dir=save_model_dir,
                                               mimic_env=mimic_learner.mimic_env,
                                               log_file=log_file)
            return_value_log_all.append(return_value_log)
            return_value_log_struct_all.append(return_value_log_struct)
            return_value_var_reduction_all.append(return_value_var_reduction)
            mae_all.append(mae)
            rmse_all.append(rmse)
            leaves_number_all.append(leaves_number)
    else:
        raise ValueError("Unknown method {0}".format(method))

    j = 1 if method == 'mcts' else 0 # skip some redundant results

    for i in range(j, len(return_value_log_all)):
        train_results_csv_writer.writerow([round(return_value_log_all[i], 4),
                                           round(return_value_log_struct_all[i], 4),
                                           round(return_value_var_reduction_all[i], 8),
                                           round(return_value_var_reduction_by_leaf_all[i], 10),
                                           round(mae_all[i], 4),
                                           round(rmse_all[i], 4),
                                           leaves_number_all[i]])

    mimic_learner.iteration_number = int(mimic_learner.episodic_sample_number * 45)
    test_results_csv_writer.writerow(['return_value_log', 'return_value_log_struct', 'return_value_var_reduction',
                                      'return_value_var_reduction_by_leaf', 'mae', 'rmse', 'leaves'])

    return_value_log_record  = []
    return_value_log_struct_record = []
    return_value_var_reduction_record = []
    return_value_var_reduction_by_leaf_record = []
    mae_record = []
    rmse_record = []
    for test_id in range(5):
        if method == 'mcts':
            mimic_learner.static_data_loader(action_id, log_file, img_type, training_flag=False, test_id=test_id)
            # mimic_learner.data_loader(episode_number=45.5, target="latent", action_id=action_id)
            mimic_learner.mimic_env.assign_data(mimic_learner.memory)
            saved_nodes_dir = mimic_learner.get_MCTS_nodes_dir(action_id, disentangler_name)
            return_value_log_all, return_value_log_struct_all, return_value_var_reduction_all, \
            return_value_var_reduction_by_leaf_all, mae_all, rmse_all, leaves_number_all = mimic_learner.predict_mcts_by_splits(action_id, saved_nodes_dir)
        elif method == 'cart-fvae' or method == "cart":
            # target = "latent" if method == 'cart-fvae' else "raw"
            # mimic_learner.data_loader(episode_number=45.5, target=target, action_id=action_id)
            mimic_learner.static_data_loader(action_id, log_file, img_type, training_flag=False, test_id=test_id)
            mimic_learner.mimic_env.assign_data(mimic_learner.memory)
            return_value_log_all = []
            return_value_log_struct_all = []
            return_value_var_reduction_all = []
            return_value_var_reduction_by_leaf_all = []
            mae_all = []
            rmse_all = []
            leaves_number_all = []
            init_state, init_var_list = mimic_learner.mimic_env.initial_state(action=action_id)
            testing_data = [[], []]
            for data_index in init_state[0]:
                data_input = mimic_learner.memory[data_index][0]
                data_output = mimic_learner.memory[data_index][4]
                testing_data[0].append(data_input)
                testing_data[1].append(data_output)
            testing_data[0] = np.stack(testing_data[0], axis=0)
            for i in range(2, 301):
                save_model_dir = mimic_learner.global_model_data_path + '/DRL-interpreter-model/comparison' \
                                                                        '/cart/{0}/{1}-aid{2}-node{3}' \
                                                                        '-sklearn.model'.format(mimic_learner.game_name,
                                                                                                mimic_learner.method,
                                                                                                action_id,
                                                                                                i)
                return_value_log, return_value_log_struct, \
                return_value_var_reduction, mae, rmse, leaves_number \
                    = mimic_learner.mimic_model.test_mimic(testing_data=testing_data,
                                                           save_model_dir=save_model_dir,
                                                           mimic_env=mimic_learner.mimic_env,
                                                           log_file=log_file)
                return_value_log_all.append(return_value_log)
                return_value_log_struct_all.append(return_value_log_struct)
                return_value_var_reduction_all.append(return_value_var_reduction)
                return_value_var_reduction_by_leaf_all.append(float(return_value_var_reduction)/i)
                mae_all.append(mae)
                rmse_all.append(rmse)
                leaves_number_all.append(leaves_number)
        else:
            raise ValueError("Unknown method {0}".format(method))

        return_value_log_record.append(return_value_log_all)
        return_value_log_struct_record.append(return_value_log_struct_all)
        return_value_var_reduction_record.append(return_value_var_reduction_all)
        return_value_var_reduction_by_leaf_record.append(return_value_var_reduction_by_leaf_all)
        mae_record.append(mae_all)
        rmse_record.append(rmse_all)

    return_value_log_record_mean = np.mean(np.asarray(return_value_log_record), axis=0)
    return_value_log_record_var = np.var(np.asarray(return_value_log_record), axis=0)

    return_value_log_struct_record_mean = np.mean(np.asarray(return_value_log_struct_record), axis=0)
    return_value_log_struct_record_var = np.var(np.asarray(return_value_log_struct_record), axis=0)

    return_value_var_reduction_record_mean = np.mean(np.asarray(return_value_var_reduction_record), axis=0)
    return_value_var_reduction_record_var = np.var(np.asarray(return_value_var_reduction_record), axis=0)

    return_value_var_reduction_by_leaf_record_mean = np.mean(np.asarray(return_value_var_reduction_by_leaf_record), axis=0)
    return_value_var_reduction_by_leaf_record_var = np.var(np.asarray(return_value_var_reduction_by_leaf_record), axis=0)

    mae_record_mean = np.mean(np.asarray(mae_record), axis=0)
    mae_record_var = np.var(np.asarray(mae_record), axis=0)

    rmse_record_mean = np.mean(np.asarray(rmse_record), axis=0)
    rmse_record_var = np.var(np.asarray(rmse_record), axis=0)

    for i in range(j, len(return_value_log_all)):
        test_results_csv_writer.writerow(["{0}({1})".format(round(return_value_log_record_mean[i], 4),
                                                            round(return_value_log_record_var[i], 8)),
                                          "{0}({1})".format(round(return_value_log_struct_record_mean[i], 4),
                                                            round(return_value_log_struct_record_var[i], 8)),
                                          "{0}({1})".format(round(return_value_var_reduction_record_mean[i], 5),
                                                            round(return_value_var_reduction_record_var[i], 10)),
                                          "{0}({1})".format(round(return_value_var_reduction_by_leaf_record_mean[i], 5),
                                                            round(return_value_var_reduction_by_leaf_record_var[i], 10)),
                                          "{0}({1})".format(round(mae_record_mean[i], 4),
                                                            round(mae_record_var[i], 8)),
                                          "{0}({1})".format(round(rmse_record_mean[i], 4),
                                                            round(rmse_record_var[i], 8)),
                                          leaves_number_all[i] ])
示例#6
0
def run(game_name=None, disentangler_name=None, run_tmp_test=False, method = None, iter_test_num=5):
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"

    if game_name is None:
        game_name = 'Assault-v0'

    if disentangler_name is None:
        disentangler_name = 'CVAE'

    if method is None:
        method = 'cart-fvae'

    if game_name == 'flappybird':
        action_ids = [0]
        model_name = '{0}-None'.format(disentangler_name)
        config_path = "../environment_settings/flappybird_config.yaml"
    elif game_name == 'SpaceInvaders-v0':
        action_ids = [4]
        model_name = '{0}-None'.format(disentangler_name)
        config_path = "../environment_settings/space_invaders_v0_config.yaml"
    elif game_name == 'Enduro-v0':
        action_ids = [7]  # 1: speed, 7 right, 8 left
        model_name = '{0}-None'.format(disentangler_name)
        config_path = "../environment_settings/enduro_v0_config.yaml"
    elif game_name == 'Assault-v0':
        action_ids = [4]  # 2: shot, 3 right, 4 left
        model_name = '{0}-None'.format(disentangler_name)
        config_path = '../environment_settings/assault_v0_config.yaml'
    else:
        raise ValueError("Unknown game name {0}".format(game_name))

    if method == 'mcts':
        options_dict = {
            'flappybird':['max_node', 30, 'cpuct', 0.1, 'play_number', 200],
            # 'Assault-v0':[]
        }
        action_ids = [0]
        data_type = 'latent'
    elif method == 'cart':
        disentangler_name = None
        options_dict = {
            'flappybird': ['max_leaf_nodes', None, 'criterion', 'mae', 'random', 'min_samples_leaf', 2],
            'Assault-v0': ['max_leaf_nodes', None, 'criterion', 'mae', 'random', 'min_samples_leaf', 4],
            'SpaceInvaders-v0': ['max_leaf_nodes', None, 'criterion', 'mae', 'best', 'min_samples_leaf',2],
            'Enduro-v0': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 1],
        }
        data_type = 'binary'
        # data_type = 'raw'
    elif method == 'cart-fvae':
        options_dict = {
            'flappybird': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 15],
            'Assault-v0': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 14],
            'SpaceInvaders-v0': ['max_leaf_nodes', None, 'criterion', 'mse', 'best', 'min_samples_leaf', 6],
            # 'Assault-v0': ['max_leaf_nodes', 80, 'criterion', 'mse', 'best', 'min_samples_leaf', 1],
        }
        data_type = 'latent'
    elif method == 'm5-rt':  # m5 regression tree
        disentangler_name = None
        options_dict = {
            'flappybird': ["-R", "-N", "-M", "10"],
            # 'Assault-v0': ["-R", "-N", "-M", "20"],
            'SpaceInvaders-v0': ["-R", "-N", "-M", "5"],
        }
        data_type = 'color'
        # options = ["-R"]
    elif method == 'm5-mt':  # m5 model tree
        # options = ["-M", "10"]
        disentangler_name = None
        options_dict = {
            'flappybird':["-N", "-M", "10"],
            # 'Assault-v0':["-N", "-M", "25"],
            'SpaceInvaders-v0': ["-N", "-M", "5"],
        }
        data_type = 'color'
    else:
        raise ValueError("unknown model name {0}".format(method))
    options = options_dict[game_name]

    option_str = '-'.join([str(option) for option in options])
    results_saving_dir = '../results/comparison_results/{0}/{0}-results-{1}-{2}-{3}.txt'.format(game_name,
                                                                                                method,
                                                                                                option_str,
                                                                                                disentangler_name)
    results_writer = open(results_saving_dir, 'w')

    local_test_flag = False
    if local_test_flag:
        mimic_config = DRLMimicConfig.load(config_path)
        mimic_config.DEG.FVAE.dset_dir = '../example_data'
        global_model_data_path = ''
        mimic_config.Mimic.Learn.episodic_sample_number = 49
    elif os.path.exists("/Local-Scratch/oschulte/Galen"):
        mimic_config = DRLMimicConfig.load(config_path)
        global_model_data_path = "/Local-Scratch/oschulte/Galen"
    elif os.path.exists("/home/functor/scratch/Galen/project-DRL-Interpreter"):
        mimic_config = DRLMimicConfig.load(config_path)
        global_model_data_path = "/home/functor/scratch/Galen/project-DRL-Interpreter"
    else:
        raise EnvironmentError("Unknown running setting, please set up your own environment")

    print('global path is : {0}'.format(global_model_data_path))
    log_file = None


    train_record_results = {'return_value_log':[], 'return_value_log_struct':[], 'return_value_var_reduction':[],
                            'mae':[], 'rmse':[], 'leaves_number':[], 'results_strs':[]}

    testing_record_results_all = []
    for test_id in range(iter_test_num):
        testing_record_results = {'return_value_log':[], 'return_value_log_struct':[], 'return_value_var_reduction':[],
                                  'mae': [], 'rmse': [], 'leaves_number': [], 'results_strs':[]}
        testing_record_results_all.append(testing_record_results)

    # try:
    print("\nRunning for game {0} with {1}".format(game_name, method), file=log_file)
    mimic_learner = MimicLearner(game_name=game_name,
                                 method=method,
                                 config=mimic_config,
                                 deg_model_name=model_name,
                                 local_test_flag=local_test_flag,
                                 global_model_data_path=global_model_data_path,
                                 log_file=log_file,
                                 options=options)

    for action_id in action_ids:
    # for action_id in [1]:
        mimic_learner.iteration_number = 0
        [return_value_log, return_value_log_struct,
         return_value_var_reduction, mae, rmse,
         leaves_number, results_str] = mimic_learner.train_mimic_model(action_id=action_id,
                                                                       shell_round_number=None,
                                                                       log_file=log_file,
                                                                       launch_time=None,
                                                                       data_type=data_type,
                                                                       run_mcts=False,
                                                                       disentangler_name=disentangler_name,
                                                                       run_tmp_test=run_tmp_test
                                                                       )
        train_record_results['return_value_log'].append(return_value_log)
        train_record_results['return_value_log_struct'].append(return_value_log_struct)
        train_record_results['return_value_var_reduction'].append(return_value_var_reduction)
        train_record_results['mae'].append(mae)
        train_record_results['rmse'].append(rmse)
        train_record_results['leaves_number'].append(leaves_number)
        train_record_results['results_strs'].append(results_str)


        for test_id in range(iter_test_num):
            [return_value_log, return_value_log_struct,
             return_value_var_reduction, mae, rmse,
             leaves_number, results_str] = mimic_learner.test_mimic_model(action_id= action_id,
                                                                          log_file=log_file,
                                                                          data_type=data_type,
                                                                          disentangler_name=disentangler_name,
                                                                          run_tmp_test=run_tmp_test,
                                                                          test_id=test_id)
            testing_record_results_all[test_id]['return_value_log'].append(return_value_log)
            testing_record_results_all[test_id]['return_value_log_struct'].append(return_value_log_struct)
            testing_record_results_all[test_id]['return_value_var_reduction'].append(return_value_var_reduction)
            testing_record_results_all[test_id]['mae'].append(mae)
            testing_record_results_all[test_id]['rmse'].append(rmse)
            testing_record_results_all[test_id]['leaves_number'].append(leaves_number)
            testing_record_results_all[test_id]['results_strs'].append(results_str)
    # except Exception as e:
    #     traceback.print_exc(file=log_file)
    #     results_writer.close()
    #     if log_file is not None:
    #         log_file.write(str(e))
    #         log_file.flush()
    #         log_file.close()
    #         # sys.stderr.write('finish shell round {0}'.format(shell_round_number))

    for results_str in train_record_results['results_strs']:
        results_writer.write(results_str+'\n')

    mean_train_return_value_log= np.mean(train_record_results['return_value_log'])
    mean_train_return_value_log_struct = np.mean(train_record_results['return_value_log_struct'])
    mean_train_return_value_var_reduction= np.mean(train_record_results['return_value_var_reduction'])
    mean_train_mae = np.mean(train_record_results['mae'])
    mean_train_rmse = np.mean(train_record_results['rmse'])
    results_str = "Training method {0}: Avg.return_value_log:{1}, " \
                  "Avg.return_value_log_struct:{2}, Avg.return_value_var_reduction:{3}," \
                  "Avg.mae:{4}, Avg.rmse:{5}, Avg.leaves:{6}\n\n".format(method,
                                                                         str(mean_train_return_value_log)+ "({0})".format(
                                                                             float(mean_train_return_value_log) / leaves_number),
                                                                         str(mean_train_return_value_log_struct) + "({0})".format(
                                                                             float(mean_train_return_value_log_struct) / leaves_number),
                                                                         str(mean_train_return_value_var_reduction) + "({0})".format(
                                                                             float(mean_train_return_value_var_reduction) / leaves_number),
                                                                         str(mean_train_mae) + "({0})".format(
                                                                             float(mean_train_mae) / leaves_number),
                                                                         str(mean_train_rmse) + "({0})".format(
                                                                             float(mean_train_rmse) / leaves_number),
                                                                         np.mean(train_record_results['leaves_number']))
    results_writer.write(results_str)

    test_return_value_log_all = []
    test_return_value_log_struct_all = []
    test_return_value_var_reduction_all = []
    test_return_value_var_reduction_per_leaf_all = []
    test_mae_all = []
    test_rmse_all = []
    test_leaf_num_all = []
    for test_id in range(iter_test_num):
        testing_record_results = testing_record_results_all[test_id]
        for results_str in testing_record_results['results_strs']:
            results_writer.write(results_str+'iter{0}\n'.format(test_id))

        mean_test_return_value_log= np.mean(testing_record_results['return_value_log'])
        mean_test_return_value_log_struct = np.mean(testing_record_results['return_value_log_struct'])
        mean_test_return_value_var_reduction= np.mean(testing_record_results['return_value_var_reduction'])
        mean_test_mae = np.mean(testing_record_results['mae'])
        mean_test_rmse = np.mean(testing_record_results['rmse'])
        results_str = "Testing method {0} iter{7}: Avg.return_value_log:{1}, " \
                      "Avg.return_value_log_struct:{2}, Avg.return_value_var_reduction:{3}," \
                      "Avg.mae:{4}, Avg.rmse:{5}, Avg.leaves:{6}\n\n".format(method,
                                                                             str(mean_test_return_value_log)+ "({0})".format(
                                                                                 float(mean_test_return_value_log) / leaves_number),
                                                                             str(mean_test_return_value_log_struct) + "({0})".format(
                                                                                 float(mean_test_return_value_log_struct) / leaves_number),
                                                                             str(mean_test_return_value_var_reduction) + "({0})".format(
                                                                                 float(mean_test_return_value_var_reduction) / leaves_number),
                                                                             str(mean_test_mae) + "({0})".format(
                                                                                 float(mean_test_mae) / leaves_number),
                                                                             str(mean_test_rmse) + "({0})".format(
                                                                                 float(mean_test_rmse) / leaves_number),
                                                                             np.mean(testing_record_results['leaves_number']),
                                                                             test_id)
        results_writer.write(results_str)
        test_return_value_log_all.append(mean_test_return_value_log)
        test_return_value_log_struct_all.append(mean_test_return_value_log_struct)
        test_return_value_var_reduction_all.append(mean_test_return_value_var_reduction)
        test_return_value_var_reduction_per_leaf_all.append(mean_test_return_value_var_reduction/ leaves_number)
        test_mae_all.append(mean_test_mae)
        test_rmse_all.append(mean_test_rmse)
        test_leaf_num_all.append(leaves_number)


    results_str = "Testing method {0}: Mean.var_reduction:{1}," \
                  "Mean.var_reduction_per_leaf: {2}, " \
                  "Mean.mae:{3}, Mean.rmse:{4}, Mean.leaves:{5}\n\n".format(method,
                                                                          "{0}({1})".format(np.mean(test_return_value_var_reduction_all),
                                                                                            np.var(test_return_value_var_reduction_all)),
                                                                          "{0}({1})".format(np.mean(test_return_value_var_reduction_per_leaf_all),
                                                                                            np.var(test_return_value_var_reduction_per_leaf_all)),
                                                                          "{0}({1})".format(np.mean(test_mae_all),
                                                                                              np.var(test_mae_all)),
                                                                          "{0}({1})".format(np.mean(test_rmse_all),
                                                                                              np.var(test_rmse_all)),
                                                                          "{0}({1})".format(np.mean(test_leaf_num_all),
                                                                                              np.var(test_leaf_num_all)),
                                                                            )
    results_writer.write(results_str)
    print(results_str, file=log_file)

    results_writer.close()
    if 'mcts' not in method:
        mimic_learner.mimic_model.__del__()

    if log_file is not None:
        log_file.close()
示例#7
0
def run_static_data_generation(model_number=None, game_name=None,
                               disentangler_type = None,
                               image_type = None,
                               global_model_data_path = "/Local-Scratch/oschulte/Galen",
                               run_tmp_test=False, test_run=5):
    # game_name = 'flappybird'
    # image_type = 'latent'
    # disentangler_type = 'CVAE'
    # global_model_data_path = "/Local-Scratch/oschulte/Galen"

    if game_name is None:
        game_name = "Assault-v0"
    if disentangler_type is None:
        disentangler_type = None
    if image_type is None:
        image_type = 'binary'
    if model_number is None:
        model_number = None


    if run_tmp_test:
        tmp_msg = 'tmp_'
    else:
        tmp_msg = ''

    if game_name == 'flappybird':
        model_name = '{0}-{1}'.format(disentangler_type, model_number)
        config_game_name = 'flappybird'
        aids = [0]
    elif game_name == 'SpaceInvaders-v0':
        model_name = '{0}-{1}'.format(disentangler_type, model_number)
        config_game_name = "space_invaders_v0"
        aids = [4]
    elif game_name == 'Enduro-v0':
        model_name = '{0}-{1}'.format(disentangler_type, model_number)
        config_game_name = "enduro_v0"
        aids = [7]
    elif game_name == 'Assault-v0':
        model_name = '{0}-{1}'.format(disentangler_type, model_number)
        config_game_name = 'assault_v0'
        aids = [4]  # 2: shot, 3 right, 4 left
    elif game_name == 'Breakout-v0':
        model_name = '{0}-{1}'.format(disentangler_type, model_number)
        config_game_name = 'breakout_v0'
    else:
        raise ValueError("Unknown game name {0}".format(game_name))

    mimic_config_path = "../environment_settings/{0}_config.yaml".format(config_game_name)
    mimic_config = DRLMimicConfig.load(mimic_config_path)

    if image_type != 'latent':
        data_loader = return_data(mimic_config.DEG.Learn,
                                  global_model_data_path,
                                  mimic_config.DRL.Learn.actions,
                                  image_type=image_type)
        disentangler = None
    else:
        data_loader = return_data(mimic_config.DEG.Learn,
                                  global_model_data_path,
                                  mimic_config.DRL.Learn.actions,
                                  image_type='origin')
        disentangler = Disentanglement(mimic_config, disentangler_type, False, global_model_data_path)
        disentangler.load_checkpoint(ckptname= model_name, testing_flag=True, log_file=None)

    for aid in aids:
        data_save_dir = '/Local-Scratch/oschulte/Galen/DRL-interpreter-model/data'

        training_data_action = data_builder(episode_number=4, action_id=aid,
                                            data_save_dir=data_save_dir,
                                            dientangler=disentangler,
                                            image_type=image_type,
                                            game_name = game_name,
                                            iteration_number=0,
                                            disentangler_type=disentangler_type,
                                            data_loader=data_loader,
                                            action_number=mimic_config.DRL.Learn.actions)
        impact_file_name_training = '{5}impact_training_{4}_data_{1}_action_{2}.csv'.format(
            image_type, game_name, aid, disentangler_type, image_type, tmp_msg)
        impact_file_Writer_training = open('../LMUT_data/' + impact_file_name_training, 'w')

        print('Writing training csv for action {}...'.format(aid))
        write_header(impact_file_Writer_training, image_type=image_type)
        write_data_text(training_data_action, impact_file_Writer_training)
        impact_file_Writer_training.close()

        iteration_number = 1000 * 45
        testing_data_action = data_builder(episode_number=46, action_id=aid,
                                           data_save_dir=data_save_dir,
                                           dientangler=disentangler,
                                           image_type=image_type,
                                           game_name = game_name,
                                           iteration_number=iteration_number,
                                           disentangler_type=disentangler_type,
                                           data_loader=data_loader,
                                           action_number=mimic_config.DRL.Learn.actions)

        for i in range(test_run):
            testing_data_action_iter = testing_data_action[i*100:(i+5)*100]
            # testing_data_action_iter = random.sample(testing_data_action, 500)
            # for j in range(int(len(testing_data_action)/test_run)):
            #     testing_data_action_iter.append(testing_data_action[iter_test])
            #     iter_test += 1
            # create training and testing files
            impact_file_name_testing = '{5}impact_testing_{4}_data_{1}_action_{2}_iter{6}.csv'.format(
                image_type, game_name, aid, disentangler_type, image_type, tmp_msg, i)
            impact_file_Writer_testing = open('../LMUT_data/' + impact_file_name_testing, 'w')

            print('Writing testing csv for action {0} in iter {1}...'.format(aid, i))
            write_header(impact_file_Writer_testing, image_type=image_type)
            write_data_text(testing_data_action_iter, impact_file_Writer_testing)
            impact_file_Writer_testing.close()