Python Agent примеры использования

Язык программирования: Python

Пространство имен/Пакет: openlockagents.common.agent

Класс/Тип: Agent

Примеров на hotexamples.com: 7

Python Agent - 7 примеров найдено. Это лучшие примеры Python кода для openlockagents.common.agent.Agent, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

pre_instantiation_setup(6)

make_env(3)

plot_rewards(1)

Пример #1

Показать файл

Файл: generate_causal_structures.py Проект: jordan-schneider/OpenLockLearner-AAAI20

def generate_causal_structures(max_delay: int = 0):
    params = dict()
    params["use_physics"] = False
    params["train_scenario_name"] = "CE3D"
    params["src_dir"] = None

    env = Agent.pre_instantiation_setup(params, bypass_confirmation=True)
    env.lever_index_mode = "position"

    attributes = [
        env.attribute_labels[attribute] for attribute in env.attribute_order
    ]
    structure = CAUSAL_CHAIN_EDGES

    (
        causal_chain_structure_space_path,
        two_solution_schemas_structure_space_path,
        three_solution_schemas_structure_space_path,
    ) = setup_structure_space_paths()
    generate_hypothesis_space(
        env=env,
        structure=structure,
        causal_chain_structure_space_path=causal_chain_structure_space_path,
        two_solution_schemas_structure_space_path=
        two_solution_schemas_structure_space_path,
        three_solution_schemas_structure_space_path=
        three_solution_schemas_structure_space_path,
        attributes=attributes,
        actions=ACTIONS,
        fluents=FLUENTS,
        fluent_states=FLUENT_STATES,
        perceptually_causal_relations=None,
        max_delay=max_delay,
    )
    return

Пример #2

Показать файл

Файл: openlock_learner_open_lock.py Проект: sui6662012/OpenLockLearner-AAAI20

def main():

    global_start_time = time.time()

    args = parse_arguments()

    ablation_params = AblationParams()

    if args.savedir is None:
        data_dir = "~/Desktop/Mass/OpenLockLearningResults/cc3-ce4_subjects"
    else:
        data_dir = args.savedir
    if args.scenario is None:
        param_scenario = "CC3-CE4"
    else:
        param_scenario = args.scenario
    if args.bypass_confirmation is None:
        bypass_confirmation = False
    else:
        bypass_confirmation = True
    if args.ablations is None:
        # ablation_params.INDEXED_DISTRIBUTIONS = True
        # ablation_params.PRUNING = True
        # ablation_params.TOP_DOWN_FIRST_TRIAL = True
        pass
    else:
        # process ablations
        for ablation in args.ablations:
            ablation = ablation.upper()
            if hasattr(ablation_params, ablation):
                setattr(ablation_params, ablation, True)
            else:
                exception_str = "Unknown ablation argument: {}".format(
                    ablation)
                raise ValueError(exception_str)

    params = PARAMS[param_scenario]
    params["data_dir"] = data_dir
    params["train_attempt_limit"] = 30
    params["test_attempt_limit"] = 30
    # params['full_attempt_limit'] = True      # run to the full attempt limit, regardless of whether or not all solutions were found
    # run to the full attempt limit, regardless of whether or not all solutions were found
    params["full_attempt_limit"] = False
    params["intervention_sample_size"] = 10
    params["chain_sample_size"] = 1000
    params["use_physics"] = False

    # openlock learner params
    params["lambda_multiplier"] = 1
    params["local_alpha_update"] = 1
    params["global_alpha_update"] = 1
    params["epsilon"] = 0.99
    params["epsilon_decay"] = 0.99
    params["epsilon_active"] = False
    # these params were extracted using matlab
    # params["epsilon_ratios"] = [0.5422, 0.3079, 0.1287, 0.1067, 0, 0]
    params["intervention_mode"] = "action"
    # params["intervention_mode"] = 'attempt'
    # setup ablations
    params["ablation_params"] = ablation_params
    params["effect_probabilities"] = generate_effect_probabilities(l0=1,
                                                                   l1=1,
                                                                   l2=1,
                                                                   door=1)

    params["using_ids"] = False
    params["multiproc"] = False
    params["deterministic"] = False
    params["num_agent_runs"] = 40
    params["src_dir"] = "/tmp/openlocklearner/" + str(hash(
        time.time())) + "/src/"
    params["print_messages"] = False

    env = Agent.pre_instantiation_setup(params, bypass_confirmation)
    env.lever_index_mode = "position"

    causal_chain_structure_space_path, two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path = setup_structure_space_paths(
    )

    if not os.path.exists(causal_chain_structure_space_path):
        print(
            "WARNING: no hypothesis space files found, generating hypothesis spaces"
        )
        generate_causal_structures()

    interventions_predefined = []
    # interventions_predefined = [("push_LOWERLEFT", "push_UPPERRIGHT", "push_door")]

    # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps
    num_steps_with_no_pruning_to_finish_trial = 500
    num_agent_runs = params["num_agent_runs"]
    for i in range(num_agent_runs):
        agent_start_time = time.time()

        env = Agent.make_env(params)
        env.lever_index_mode = "position"

        causal_chain_structure_space, two_solution_schemas, three_solution_schemas = load_causal_structures_from_file(
            causal_chain_structure_space_path,
            two_solution_schemas_structure_space_path,
            three_solution_schemas_structure_space_path,
        )

        # setup agent
        agent = OpenLockLearnerAgent(
            env, causal_chain_structure_space, params, **{
                "two_solution_schemas": two_solution_schemas,
                "three_solution_schemas": three_solution_schemas,
            })

        possible_trials = agent.get_random_order_of_possible_trials(
            params["train_scenario_name"])

        agent.training_trial_order = possible_trials
        # training
        for trial_name in possible_trials:
            trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial(
                scenario_name=params["train_scenario_name"],
                action_limit=params["train_action_limit"],
                attempt_limit=params["train_attempt_limit"],
                specified_trial=trial_name,
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        # testing
        if params["test_scenario_name"] == "CE4" or params[
                "test_scenario_name"] == "CC4":
            trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial(
                scenario_name=params["test_scenario_name"],
                action_limit=params["test_action_limit"],
                attempt_limit=params["test_attempt_limit"],
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        agent.print_agent_summary()
        print("Finished agent. Total runtime: {}s".format(time.time() -
                                                          agent_start_time))
        agent.finish_subject("OpenLockLearner", "OpenLockLearner")

    print("Finished all agents for {}. Total runtime: {}s".format(
        param_scenario,
        time.time() - global_start_time))
    return

Пример #3

Показать файл

Файл: ddpg_open_lock.py Проект: jordan-schneider/OpenLockLearner-AAAI20

def replot_training_results(path):
    agent_json = json.load(open(path))
    agent_folder = os.path.dirname(path)
    Agent.plot_rewards(agent_json["rewards"], agent_json["epsilons"],
                       agent_folder + "/reward_plot.png")

Пример #4

Показать файл

Файл: human_open_lock.py Проект: sui6662012/OpenLockLearner-AAAI20

    # params["data_dir"] = os.path.dirname(ROOT_DIR) + "/OpenLockResults/subjects"
    params["data_dir"] = human_config_data["HUMAN_SAVE_DIR"]
    params["src_dir"] = "/tmp/openlocklearner/" + str(hash(
        time.time())) + "/src/"
    params["use_physics"] = True
    params["effect_probabilities"] = generate_effect_probabilities()

    # this section randomly selects a testing and training scenario
    # train_scenario_name, test_scenario_name = select_random_scenarios()
    # params['train_scenario_name'] = train_scenario_name
    # params['test_scenario_name'] = test_scenario_name

    scenario = select_scenario(params["train_scenario_name"])

    # todo: this should not be part of OpenLockLearnerAgent
    env = Agent.pre_instantiation_setup(params)
    env.lever_index_mode = "role"

    # create session/trial/experiment manager
    agent = HumanAgent(params, env)

    atexit.register(agent.cleanup)

    # used for debugging, runs a specific scenario & trial
    # run_specific_trial_and_scenario(manager, 'CC3', 'trial5', params['train_action_limit'], params['train_attempt_limit'])

    for trial_num in range(0, params["train_num_trials"]):
        agent.run_trial_human(
            params["train_scenario_name"],
            params["train_action_limit"],
            params["train_attempt_limit"],

Пример #5

Показать файл

def main():

    global_start_time = time.time()

    args = parse_arguments()

    logging.basicConfig(
        level=args.verbosity,
        format="%(asctime)s:%(filename)s:%(lineno)d:%(levelname)s %(message)s",
    )
    logging.info(args)

    ablation_params = AblationParams()

    if args.savedir is None:
        data_dir = "/home/joschnei/OpenLock/agent/data/OpenLockLearningResults/cc3-ce4_subjects"
    else:
        data_dir = args.savedir
    if args.scenario is None:
        param_scenario = "CC3-CE4"
    else:
        param_scenario = args.scenario
    if args.bypass_confirmation is None:
        bypass_confirmation = False
    else:
        bypass_confirmation = True
    if args.ablations is None:
        pass
    else:
        # process ablations
        for ablation in args.ablations:
            ablation = ablation.upper()
            if hasattr(ablation_params, ablation):
                setattr(ablation_params, ablation, True)
            else:
                exception_str = "Unknown ablation argument: {}".format(
                    ablation)
                raise ValueError(exception_str)

    params = PARAMS[param_scenario]
    params["data_dir"] = data_dir
    params["train_attempt_limit"] = args.train_attempt_limit
    params["test_attempt_limit"] = args.test_attempt_limit
    # run to the full attempt limit, regardless of whether or not all solutions were found
    params["full_attempt_limit"] = False
    params["intervention_sample_size"] = 10  # doesn't matter
    params["chain_sample_size"] = 1000  # doesn't matter
    params["use_physics"] = False

    # openlock learner params
    params["lambda_multiplier"] = 1
    params["local_alpha_update"] = 1
    params["global_alpha_update"] = 1
    params["epsilon"] = 0.99
    params["epsilon_decay"] = 0.99
    params["epsilon_active"] = False
    # these params were extracted using matlab
    params["intervention_mode"] = "action"
    # setup ablations
    params["ablation_params"] = ablation_params
    params["effect_probabilities"] = generate_effect_probabilities(l0=1.0,
                                                                   l1=1.0,
                                                                   l2=1.0,
                                                                   door=1.0)

    params["using_ids"] = False
    params["multiproc"] = False
    params["deterministic"] = True  # Why would you shuffle the chains????
    params["num_agent_runs"] = args.n_replications
    params["src_dir"] = None
    params["print_messages"] = False
    params["n_cpus"] = args.n_cpus

    logging.info(params)

    logging.info("Pre-instantiation setup")
    env = Agent.pre_instantiation_setup(params, bypass_confirmation)
    env.lever_index_mode = "position"

    (
        causal_chain_structure_space_path,
        two_solution_schemas_structure_space_path,
        three_solution_schemas_structure_space_path,
    ) = setup_structure_space_paths()

    if not os.path.exists(causal_chain_structure_space_path):
        logging.warning(
            "No hypothesis space files found, generating hypothesis spaces")
        generate_causal_structures(max_delay=params.get("max_delay", 0))

    interventions_predefined = []

    # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps
    num_steps_with_no_pruning_to_finish_trial = 500
    num_agent_runs = params["num_agent_runs"]

    logging.info("Loading structure and schemas")
    (
        causal_chain_structure_space,
        two_solution_schemas,
        three_solution_schemas,
    ) = load_causal_structures_from_file(
        causal_chain_structure_space_path,
        two_solution_schemas_structure_space_path,
        three_solution_schemas_structure_space_path,
    )

    logging.info("Starting trials")
    for i in range(num_agent_runs):
        logging.info(f"Starting agent run {i} of {num_agent_runs}")
        agent_start_time = time.time()

        env = Agent.make_env(params)
        env.lever_index_mode = "position"

        # setup agent
        agent = OpenLockLearnerAgent(
            env,
            causal_chain_structure_space,
            params,
            **{
                "two_solution_schemas": two_solution_schemas,
                "three_solution_schemas": three_solution_schemas,
            },
        )

        possible_trials = agent.get_random_order_of_possible_trials(
            params["train_scenario_name"])

        agent.training_trial_order = possible_trials
        logging.info("Training agent")
        for trial_name in possible_trials:
            agent.multiproc = "4" in params["train_scenario_name"]
            (
                trial_selected,
                chain_idxs_pruned_from_initial_observation,
            ) = agent.setup_trial(
                scenario_name=params["train_scenario_name"],
                action_limit=params["train_action_limit"],
                attempt_limit=params["train_attempt_limit"],
                specified_trial=trial_name,
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        # testing
        if params["test_scenario_name"] in ("CE4", "CC4", "CE4D", "CC4D"):
            logging.info("Testing agent")

            agent.multiproc = True

            (
                trial_selected,
                chain_idxs_pruned_from_initial_observation,
            ) = agent.setup_trial(
                scenario_name=params["test_scenario_name"],
                action_limit=params["test_action_limit"],
                attempt_limit=params["test_attempt_limit"],
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        agent.print_agent_summary()
        logging.info(
            "Finished agent. Total runtime: {}s".format(time.time() -
                                                        agent_start_time))
        agent.finish_subject("OpenLockLearner", "OpenLockLearner")

    logging.info("Finished all agents for {}. Total runtime: {}s".format(
        param_scenario,
        time.time() - global_start_time))
    return

Пример #6

Показать файл

Файл: simplified_testing_scenario.py Проект: sui6662012/OpenLockLearner-AAAI20

def main():

    global_start_time = time.time()

    param_scenario = "CE3-CE4"
    params = PARAMS[param_scenario]
    params["data_dir"] = "~/Desktop/OpenLockLearningResultsTesting/subjects"
    params["train_scenario_name"] = "CE3_simplified"
    params["test_scenario_name"] = "CE3_simplified"
    params["train_attempt_limit"] = 10000
    params["test_attempt_limit"] = 10000
    # params['full_attempt_limit'] = True      # run to the full attempt limit, regardless of whether or not all solutions were found
    # run to the full attempt limit, regardless of whether or not all solutions were found
    params["full_attempt_limit"] = False
    params["intervention_sample_size"] = 10
    params["chain_sample_size"] = 1000

    # openlock learner params
    params["lambda_multiplier"] = 1
    params["local_alpha_update"] = 2
    params["global_alpha_update"] = 1
    params["epsilon"] = 0.99
    params["epsilon_decay"] = 0.99
    params["epsilon_active"] = False
    params["intervention_mode"] = "action"
    # params["intervention_mode"] = 'attempt'
    # setup ablations
    ablation_params = AblationParams()
    # ablation_params.INDEXED_DISTRIBUTIONS = True
    # ablation_params.PRUNING = True
    # ablation_params.TOP_DOWN_FIRST_TRIAL = True
    params["ablation_params"] = ablation_params
    params["effect_probabilities"] = generate_effect_probabilities(l0=1,
                                                                   l1=1,
                                                                   l2=1,
                                                                   door=1)
    params["using_ids"] = False
    params["multiproc"] = False
    params["use_physics"] = False

    params["deterministic"] = False
    params["num_agent_runs"] = 40
    params["src_dir"] = "/tmp/openlocklearner/" + str(hash(
        time.time())) + "/src/"

    np.random.seed(1234)

    env = Agent.pre_instantiation_setup(params)
    env.lever_index_mode = "position"

    attributes = [
        env.attribute_labels[attribute] for attribute in env.attribute_order
    ]

    structure = CAUSAL_CHAIN_EDGES

    generate_causal_structures = False
    causal_chain_structure_space_path = os.path.expanduser(
        "~/Desktop/simplified_causal_chain_space.pickle")
    two_solution_schemas_structure_space_path = os.path.expanduser(
        "~/Desktop/simplified_two_solution_schemas.pickle")
    three_solution_schemas_structure_space_path = os.path.expanduser(
        "~/Desktop/simplified_three_solution_schemas.pickle")
    if generate_causal_structures:
        # perceptually_causal_relations = (
        #     generate_perceptually_causal_relations_simplified_testing_scenario()
        # )
        perceptually_causal_relations = None
        causal_chain_structure_space = generate_chain_structure_space(
            env=env,
            actions=ACTIONS,
            attributes=attributes,
            fluents=FLUENTS,
            fluent_states=FLUENT_STATES,
            perceptually_causal_relations=perceptually_causal_relations,
            structure=structure,
        )
        write_causal_structure_space(
            causal_chain_structure_space=causal_chain_structure_space,
            causal_chain_structure_space_path=causal_chain_structure_space_path,
        )

        t = time.time()

        two_solution_schemas = AbstractSchemaStructureSpace(
            causal_chain_structure_space.structure, 2, draw_chains=False)
        write_schema_structure_space(
            schema_structure_space=two_solution_schemas,
            schema_structure_space_path=
            two_solution_schemas_structure_space_path,
        )

        three_solution_schemas = AbstractSchemaStructureSpace(
            causal_chain_structure_space.structure, 3, draw_chains=False)
        write_schema_structure_space(
            schema_structure_space=three_solution_schemas,
            schema_structure_space_path=
            three_solution_schemas_structure_space_path,
        )

        print("Schema generation time: {}s".format(time.time() - t))

        return

    # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps
    num_steps_with_no_pruning_to_finish_trial = 5

    interventions_predefined = []
    # interventions_predefined = [
    #     ("push_LEFT", "pull_LEFT"),   # pushing and pulling the same lever
    #     ("push_UPPER", "pull_UPPER"), # unlocking and locking the door
    #     ("pull_LEFT", "pull_UPPER"),  # no state change
    #     ("push_LEFT", "pull_UPPER"),  # left lever state change
    #     ("push_LEFT", "push_UPPER"),  # door unlocks at the last stage
    #     ("push_LEFT", "push_UPPER"),  # door unlocks at the last stage
    #     # ("push_UPPER", "push_door"),  # only solution
    # ]

    # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps
    num_steps_with_no_pruning_to_finish_trial = 500
    num_agent_runs = params["num_agent_runs"]
    for i in range(num_agent_runs):
        agent_start_time = time.time()

        env = Agent.make_env(params)
        env.lever_index_mode = "position"

        causal_chain_structure_space, two_solution_schemas, three_solution_schemas = load_causal_structures_from_file(
            causal_chain_structure_space_path,
            two_solution_schemas_structure_space_path,
            three_solution_schemas_structure_space_path,
        )

        # setup agent
        agent = OpenLockLearnerAgent(
            env, causal_chain_structure_space, params, **{
                "two_solution_schemas": two_solution_schemas,
                "three_solution_schemas": three_solution_schemas,
            })

        possible_trials = agent.get_random_order_of_possible_trials(
            params["train_scenario_name"])

        # training
        agent.training_trial_order = possible_trials
        for trial_name in possible_trials:
            trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial(
                scenario_name=params["train_scenario_name"],
                action_limit=params["train_action_limit"],
                attempt_limit=params["train_attempt_limit"],
                specified_trial=trial_name,
            )

            agent.run_trial_openlock_learner(
                trial_selected,
                num_steps_with_no_pruning_to_finish_trial,
                interventions_predefined=interventions_predefined,
                chain_idxs_pruned_from_initial_observation=
                chain_idxs_pruned_from_initial_observation,
                intervention_mode=params["intervention_mode"],
            )

        # testing
        trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial(
            scenario_name=params["test_scenario_name"],
            action_limit=params["test_action_limit"],
            attempt_limit=params["test_attempt_limit"],
        )

        agent.run_trial_openlock_learner(
            trial_selected,
            num_steps_with_no_pruning_to_finish_trial,
            interventions_predefined=interventions_predefined,
            chain_idxs_pruned_from_initial_observation=
            chain_idxs_pruned_from_initial_observation,
            intervention_mode=params["intervention_mode"],
        )

        agent.print_agent_summary()
        print("Finished agent. Total runtime: {}s".format(time.time() -
                                                          agent_start_time))
        agent.finish_subject("OpenLockLearner", "OpenLockLearner")

    print("Finished all agents for {}. Total runtime: {}s".format(
        param_scenario,
        time.time() - global_start_time))

Пример #7

Показать файл

def replay_subject_data(subject_dir):
    subject_data = load_subject_data(subject_dir, use_json_pickle_for_trial=False)

    print("Replaying subject {}".format(subject_data.subject_id))

    max_attempts = 999999999
    # subject has agent, more recent model data
    if hasattr(subject_data, "agent"):
        train_scenario_name = subject_data.agent["params"]["train_scenario_name"]
        train_action_limit = subject_data.agent["params"]["train_action_limit"]
        train_attempt_limit = max_attempts
        test_action_limit = subject_data.agent["params"]["test_action_limit"]
        test_attempt_limit = max_attempts
        lever_index_mode = "position"
    else:
        train_scenario_name = subject_data.trial_seq[0]["scenario_name"]
        train_action_limit = 3
        train_attempt_limit = max_attempts
        test_action_limit = 3
        test_attempt_limit = max_attempts
        # human subjects use role
        lever_index_mode = "role"

    # minimal construction of params
    params = dict()
    params["use_physics"] = True
    params["train_scenario_name"] = train_scenario_name
    params["src_dir"] = None

    env = Agent.pre_instantiation_setup(params, bypass_confirmation=True)
    env.lever_index_mode = lever_index_mode
    # setup dummy window so we can start recording
    env.setup_trial(
            "CC3",
            action_limit=3,
            attempt_limit=30,
            multiproc=False,
        )
    env.reset()

    input("Press enter to start")

    # for each trial, setup the env and execute all of the action sequences
    for trial in subject_data.trial_seq:
        trial_scenario_name = trial["scenario_name"]
        # 3 lever trial
        if trial_scenario_name == "CE3" or trial_scenario_name == "CC3":
            action_limit = train_action_limit
            attempt_limit = train_attempt_limit
        # 4 lever trial
        elif trial_scenario_name == "CE4" or trial_scenario_name == "CC4":
            action_limit = test_action_limit
            attempt_limit = test_attempt_limit
            # corrects a bug where some 4 lever testing trials did not properly save their trial name as a string
            if not isinstance(trial["name"], str):
                # we can directly reencode the bytes back into a python string
                raw_dtype, raw_bytes = jsonpickle.decode(json.dumps(trial["name"]["py/reduce"][1]))
                trial["name"] = decode_bad_jsonpickle_str(raw_bytes)
        else:
            raise ValueError("Unknown scenario name")

        # setup the env for the trial
        env.setup_trial(
            trial_scenario_name,
            action_limit=action_limit,
            attempt_limit=attempt_limit,
            specified_trial=trial["name"],
            multiproc=False,
        )

        # go through every attempt in the trial
        for attempt_seq in trial["attempt_seq"]:
            env.reset()
            # go through every action sequence in this attempt
            action_seq = attempt_seq["action_seq"]
            done = False
            action_num = 0
            # render in a loop
            while not done:
                # execute the next action if an action is not currently executing
                if env.action_executing is False:
                    action_str = action_seq[action_num]["name"]
                    action_env = env.action_map[action_str]
                    env.step(action_env)
                    action_num += 1

                env.render(env)
                done = env.determine_attempt_finished()



    print('hi')