def generate_causal_structures(max_delay: int = 0): params = dict() params["use_physics"] = False params["train_scenario_name"] = "CE3D" params["src_dir"] = None env = Agent.pre_instantiation_setup(params, bypass_confirmation=True) env.lever_index_mode = "position" attributes = [ env.attribute_labels[attribute] for attribute in env.attribute_order ] structure = CAUSAL_CHAIN_EDGES ( causal_chain_structure_space_path, two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path, ) = setup_structure_space_paths() generate_hypothesis_space( env=env, structure=structure, causal_chain_structure_space_path=causal_chain_structure_space_path, two_solution_schemas_structure_space_path= two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path= three_solution_schemas_structure_space_path, attributes=attributes, actions=ACTIONS, fluents=FLUENTS, fluent_states=FLUENT_STATES, perceptually_causal_relations=None, max_delay=max_delay, ) return
def main(): global_start_time = time.time() args = parse_arguments() ablation_params = AblationParams() if args.savedir is None: data_dir = "~/Desktop/Mass/OpenLockLearningResults/cc3-ce4_subjects" else: data_dir = args.savedir if args.scenario is None: param_scenario = "CC3-CE4" else: param_scenario = args.scenario if args.bypass_confirmation is None: bypass_confirmation = False else: bypass_confirmation = True if args.ablations is None: # ablation_params.INDEXED_DISTRIBUTIONS = True # ablation_params.PRUNING = True # ablation_params.TOP_DOWN_FIRST_TRIAL = True pass else: # process ablations for ablation in args.ablations: ablation = ablation.upper() if hasattr(ablation_params, ablation): setattr(ablation_params, ablation, True) else: exception_str = "Unknown ablation argument: {}".format( ablation) raise ValueError(exception_str) params = PARAMS[param_scenario] params["data_dir"] = data_dir params["train_attempt_limit"] = 30 params["test_attempt_limit"] = 30 # params['full_attempt_limit'] = True # run to the full attempt limit, regardless of whether or not all solutions were found # run to the full attempt limit, regardless of whether or not all solutions were found params["full_attempt_limit"] = False params["intervention_sample_size"] = 10 params["chain_sample_size"] = 1000 params["use_physics"] = False # openlock learner params params["lambda_multiplier"] = 1 params["local_alpha_update"] = 1 params["global_alpha_update"] = 1 params["epsilon"] = 0.99 params["epsilon_decay"] = 0.99 params["epsilon_active"] = False # these params were extracted using matlab # params["epsilon_ratios"] = [0.5422, 0.3079, 0.1287, 0.1067, 0, 0] params["intervention_mode"] = "action" # params["intervention_mode"] = 'attempt' # setup ablations params["ablation_params"] = ablation_params params["effect_probabilities"] = generate_effect_probabilities(l0=1, l1=1, l2=1, door=1) params["using_ids"] = False params["multiproc"] = False params["deterministic"] = False params["num_agent_runs"] = 40 params["src_dir"] = "/tmp/openlocklearner/" + str(hash( time.time())) + "/src/" params["print_messages"] = False env = Agent.pre_instantiation_setup(params, bypass_confirmation) env.lever_index_mode = "position" causal_chain_structure_space_path, two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path = setup_structure_space_paths( ) if not os.path.exists(causal_chain_structure_space_path): print( "WARNING: no hypothesis space files found, generating hypothesis spaces" ) generate_causal_structures() interventions_predefined = [] # interventions_predefined = [("push_LOWERLEFT", "push_UPPERRIGHT", "push_door")] # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps num_steps_with_no_pruning_to_finish_trial = 500 num_agent_runs = params["num_agent_runs"] for i in range(num_agent_runs): agent_start_time = time.time() env = Agent.make_env(params) env.lever_index_mode = "position" causal_chain_structure_space, two_solution_schemas, three_solution_schemas = load_causal_structures_from_file( causal_chain_structure_space_path, two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path, ) # setup agent agent = OpenLockLearnerAgent( env, causal_chain_structure_space, params, **{ "two_solution_schemas": two_solution_schemas, "three_solution_schemas": three_solution_schemas, }) possible_trials = agent.get_random_order_of_possible_trials( params["train_scenario_name"]) agent.training_trial_order = possible_trials # training for trial_name in possible_trials: trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial( scenario_name=params["train_scenario_name"], action_limit=params["train_action_limit"], attempt_limit=params["train_attempt_limit"], specified_trial=trial_name, ) agent.run_trial_openlock_learner( trial_selected, num_steps_with_no_pruning_to_finish_trial, interventions_predefined=interventions_predefined, chain_idxs_pruned_from_initial_observation= chain_idxs_pruned_from_initial_observation, intervention_mode=params["intervention_mode"], ) # testing if params["test_scenario_name"] == "CE4" or params[ "test_scenario_name"] == "CC4": trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial( scenario_name=params["test_scenario_name"], action_limit=params["test_action_limit"], attempt_limit=params["test_attempt_limit"], ) agent.run_trial_openlock_learner( trial_selected, num_steps_with_no_pruning_to_finish_trial, interventions_predefined=interventions_predefined, chain_idxs_pruned_from_initial_observation= chain_idxs_pruned_from_initial_observation, intervention_mode=params["intervention_mode"], ) agent.print_agent_summary() print("Finished agent. Total runtime: {}s".format(time.time() - agent_start_time)) agent.finish_subject("OpenLockLearner", "OpenLockLearner") print("Finished all agents for {}. Total runtime: {}s".format( param_scenario, time.time() - global_start_time)) return
def replot_training_results(path): agent_json = json.load(open(path)) agent_folder = os.path.dirname(path) Agent.plot_rewards(agent_json["rewards"], agent_json["epsilons"], agent_folder + "/reward_plot.png")
# params["data_dir"] = os.path.dirname(ROOT_DIR) + "/OpenLockResults/subjects" params["data_dir"] = human_config_data["HUMAN_SAVE_DIR"] params["src_dir"] = "/tmp/openlocklearner/" + str(hash( time.time())) + "/src/" params["use_physics"] = True params["effect_probabilities"] = generate_effect_probabilities() # this section randomly selects a testing and training scenario # train_scenario_name, test_scenario_name = select_random_scenarios() # params['train_scenario_name'] = train_scenario_name # params['test_scenario_name'] = test_scenario_name scenario = select_scenario(params["train_scenario_name"]) # todo: this should not be part of OpenLockLearnerAgent env = Agent.pre_instantiation_setup(params) env.lever_index_mode = "role" # create session/trial/experiment manager agent = HumanAgent(params, env) atexit.register(agent.cleanup) # used for debugging, runs a specific scenario & trial # run_specific_trial_and_scenario(manager, 'CC3', 'trial5', params['train_action_limit'], params['train_attempt_limit']) for trial_num in range(0, params["train_num_trials"]): agent.run_trial_human( params["train_scenario_name"], params["train_action_limit"], params["train_attempt_limit"],
def main(): global_start_time = time.time() args = parse_arguments() logging.basicConfig( level=args.verbosity, format="%(asctime)s:%(filename)s:%(lineno)d:%(levelname)s %(message)s", ) logging.info(args) ablation_params = AblationParams() if args.savedir is None: data_dir = "/home/joschnei/OpenLock/agent/data/OpenLockLearningResults/cc3-ce4_subjects" else: data_dir = args.savedir if args.scenario is None: param_scenario = "CC3-CE4" else: param_scenario = args.scenario if args.bypass_confirmation is None: bypass_confirmation = False else: bypass_confirmation = True if args.ablations is None: pass else: # process ablations for ablation in args.ablations: ablation = ablation.upper() if hasattr(ablation_params, ablation): setattr(ablation_params, ablation, True) else: exception_str = "Unknown ablation argument: {}".format( ablation) raise ValueError(exception_str) params = PARAMS[param_scenario] params["data_dir"] = data_dir params["train_attempt_limit"] = args.train_attempt_limit params["test_attempt_limit"] = args.test_attempt_limit # run to the full attempt limit, regardless of whether or not all solutions were found params["full_attempt_limit"] = False params["intervention_sample_size"] = 10 # doesn't matter params["chain_sample_size"] = 1000 # doesn't matter params["use_physics"] = False # openlock learner params params["lambda_multiplier"] = 1 params["local_alpha_update"] = 1 params["global_alpha_update"] = 1 params["epsilon"] = 0.99 params["epsilon_decay"] = 0.99 params["epsilon_active"] = False # these params were extracted using matlab params["intervention_mode"] = "action" # setup ablations params["ablation_params"] = ablation_params params["effect_probabilities"] = generate_effect_probabilities(l0=1.0, l1=1.0, l2=1.0, door=1.0) params["using_ids"] = False params["multiproc"] = False params["deterministic"] = True # Why would you shuffle the chains???? params["num_agent_runs"] = args.n_replications params["src_dir"] = None params["print_messages"] = False params["n_cpus"] = args.n_cpus logging.info(params) logging.info("Pre-instantiation setup") env = Agent.pre_instantiation_setup(params, bypass_confirmation) env.lever_index_mode = "position" ( causal_chain_structure_space_path, two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path, ) = setup_structure_space_paths() if not os.path.exists(causal_chain_structure_space_path): logging.warning( "No hypothesis space files found, generating hypothesis spaces") generate_causal_structures(max_delay=params.get("max_delay", 0)) interventions_predefined = [] # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps num_steps_with_no_pruning_to_finish_trial = 500 num_agent_runs = params["num_agent_runs"] logging.info("Loading structure and schemas") ( causal_chain_structure_space, two_solution_schemas, three_solution_schemas, ) = load_causal_structures_from_file( causal_chain_structure_space_path, two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path, ) logging.info("Starting trials") for i in range(num_agent_runs): logging.info(f"Starting agent run {i} of {num_agent_runs}") agent_start_time = time.time() env = Agent.make_env(params) env.lever_index_mode = "position" # setup agent agent = OpenLockLearnerAgent( env, causal_chain_structure_space, params, **{ "two_solution_schemas": two_solution_schemas, "three_solution_schemas": three_solution_schemas, }, ) possible_trials = agent.get_random_order_of_possible_trials( params["train_scenario_name"]) agent.training_trial_order = possible_trials logging.info("Training agent") for trial_name in possible_trials: agent.multiproc = "4" in params["train_scenario_name"] ( trial_selected, chain_idxs_pruned_from_initial_observation, ) = agent.setup_trial( scenario_name=params["train_scenario_name"], action_limit=params["train_action_limit"], attempt_limit=params["train_attempt_limit"], specified_trial=trial_name, ) agent.run_trial_openlock_learner( trial_selected, num_steps_with_no_pruning_to_finish_trial, interventions_predefined=interventions_predefined, chain_idxs_pruned_from_initial_observation= chain_idxs_pruned_from_initial_observation, intervention_mode=params["intervention_mode"], ) # testing if params["test_scenario_name"] in ("CE4", "CC4", "CE4D", "CC4D"): logging.info("Testing agent") agent.multiproc = True ( trial_selected, chain_idxs_pruned_from_initial_observation, ) = agent.setup_trial( scenario_name=params["test_scenario_name"], action_limit=params["test_action_limit"], attempt_limit=params["test_attempt_limit"], ) agent.run_trial_openlock_learner( trial_selected, num_steps_with_no_pruning_to_finish_trial, interventions_predefined=interventions_predefined, chain_idxs_pruned_from_initial_observation= chain_idxs_pruned_from_initial_observation, intervention_mode=params["intervention_mode"], ) agent.print_agent_summary() logging.info( "Finished agent. Total runtime: {}s".format(time.time() - agent_start_time)) agent.finish_subject("OpenLockLearner", "OpenLockLearner") logging.info("Finished all agents for {}. Total runtime: {}s".format( param_scenario, time.time() - global_start_time)) return
def main(): global_start_time = time.time() param_scenario = "CE3-CE4" params = PARAMS[param_scenario] params["data_dir"] = "~/Desktop/OpenLockLearningResultsTesting/subjects" params["train_scenario_name"] = "CE3_simplified" params["test_scenario_name"] = "CE3_simplified" params["train_attempt_limit"] = 10000 params["test_attempt_limit"] = 10000 # params['full_attempt_limit'] = True # run to the full attempt limit, regardless of whether or not all solutions were found # run to the full attempt limit, regardless of whether or not all solutions were found params["full_attempt_limit"] = False params["intervention_sample_size"] = 10 params["chain_sample_size"] = 1000 # openlock learner params params["lambda_multiplier"] = 1 params["local_alpha_update"] = 2 params["global_alpha_update"] = 1 params["epsilon"] = 0.99 params["epsilon_decay"] = 0.99 params["epsilon_active"] = False params["intervention_mode"] = "action" # params["intervention_mode"] = 'attempt' # setup ablations ablation_params = AblationParams() # ablation_params.INDEXED_DISTRIBUTIONS = True # ablation_params.PRUNING = True # ablation_params.TOP_DOWN_FIRST_TRIAL = True params["ablation_params"] = ablation_params params["effect_probabilities"] = generate_effect_probabilities(l0=1, l1=1, l2=1, door=1) params["using_ids"] = False params["multiproc"] = False params["use_physics"] = False params["deterministic"] = False params["num_agent_runs"] = 40 params["src_dir"] = "/tmp/openlocklearner/" + str(hash( time.time())) + "/src/" np.random.seed(1234) env = Agent.pre_instantiation_setup(params) env.lever_index_mode = "position" attributes = [ env.attribute_labels[attribute] for attribute in env.attribute_order ] structure = CAUSAL_CHAIN_EDGES generate_causal_structures = False causal_chain_structure_space_path = os.path.expanduser( "~/Desktop/simplified_causal_chain_space.pickle") two_solution_schemas_structure_space_path = os.path.expanduser( "~/Desktop/simplified_two_solution_schemas.pickle") three_solution_schemas_structure_space_path = os.path.expanduser( "~/Desktop/simplified_three_solution_schemas.pickle") if generate_causal_structures: # perceptually_causal_relations = ( # generate_perceptually_causal_relations_simplified_testing_scenario() # ) perceptually_causal_relations = None causal_chain_structure_space = generate_chain_structure_space( env=env, actions=ACTIONS, attributes=attributes, fluents=FLUENTS, fluent_states=FLUENT_STATES, perceptually_causal_relations=perceptually_causal_relations, structure=structure, ) write_causal_structure_space( causal_chain_structure_space=causal_chain_structure_space, causal_chain_structure_space_path=causal_chain_structure_space_path, ) t = time.time() two_solution_schemas = AbstractSchemaStructureSpace( causal_chain_structure_space.structure, 2, draw_chains=False) write_schema_structure_space( schema_structure_space=two_solution_schemas, schema_structure_space_path= two_solution_schemas_structure_space_path, ) three_solution_schemas = AbstractSchemaStructureSpace( causal_chain_structure_space.structure, 3, draw_chains=False) write_schema_structure_space( schema_structure_space=three_solution_schemas, schema_structure_space_path= three_solution_schemas_structure_space_path, ) print("Schema generation time: {}s".format(time.time() - t)) return # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps num_steps_with_no_pruning_to_finish_trial = 5 interventions_predefined = [] # interventions_predefined = [ # ("push_LEFT", "pull_LEFT"), # pushing and pulling the same lever # ("push_UPPER", "pull_UPPER"), # unlocking and locking the door # ("pull_LEFT", "pull_UPPER"), # no state change # ("push_LEFT", "pull_UPPER"), # left lever state change # ("push_LEFT", "push_UPPER"), # door unlocks at the last stage # ("push_LEFT", "push_UPPER"), # door unlocks at the last stage # # ("push_UPPER", "push_door"), # only solution # ] # these are used to advance to the next trial after there have no chains pruned for num_steps_with_no_pruning_to_finish_trial steps num_steps_with_no_pruning_to_finish_trial = 500 num_agent_runs = params["num_agent_runs"] for i in range(num_agent_runs): agent_start_time = time.time() env = Agent.make_env(params) env.lever_index_mode = "position" causal_chain_structure_space, two_solution_schemas, three_solution_schemas = load_causal_structures_from_file( causal_chain_structure_space_path, two_solution_schemas_structure_space_path, three_solution_schemas_structure_space_path, ) # setup agent agent = OpenLockLearnerAgent( env, causal_chain_structure_space, params, **{ "two_solution_schemas": two_solution_schemas, "three_solution_schemas": three_solution_schemas, }) possible_trials = agent.get_random_order_of_possible_trials( params["train_scenario_name"]) # training agent.training_trial_order = possible_trials for trial_name in possible_trials: trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial( scenario_name=params["train_scenario_name"], action_limit=params["train_action_limit"], attempt_limit=params["train_attempt_limit"], specified_trial=trial_name, ) agent.run_trial_openlock_learner( trial_selected, num_steps_with_no_pruning_to_finish_trial, interventions_predefined=interventions_predefined, chain_idxs_pruned_from_initial_observation= chain_idxs_pruned_from_initial_observation, intervention_mode=params["intervention_mode"], ) # testing trial_selected, chain_idxs_pruned_from_initial_observation = agent.setup_trial( scenario_name=params["test_scenario_name"], action_limit=params["test_action_limit"], attempt_limit=params["test_attempt_limit"], ) agent.run_trial_openlock_learner( trial_selected, num_steps_with_no_pruning_to_finish_trial, interventions_predefined=interventions_predefined, chain_idxs_pruned_from_initial_observation= chain_idxs_pruned_from_initial_observation, intervention_mode=params["intervention_mode"], ) agent.print_agent_summary() print("Finished agent. Total runtime: {}s".format(time.time() - agent_start_time)) agent.finish_subject("OpenLockLearner", "OpenLockLearner") print("Finished all agents for {}. Total runtime: {}s".format( param_scenario, time.time() - global_start_time))
def replay_subject_data(subject_dir): subject_data = load_subject_data(subject_dir, use_json_pickle_for_trial=False) print("Replaying subject {}".format(subject_data.subject_id)) max_attempts = 999999999 # subject has agent, more recent model data if hasattr(subject_data, "agent"): train_scenario_name = subject_data.agent["params"]["train_scenario_name"] train_action_limit = subject_data.agent["params"]["train_action_limit"] train_attempt_limit = max_attempts test_action_limit = subject_data.agent["params"]["test_action_limit"] test_attempt_limit = max_attempts lever_index_mode = "position" else: train_scenario_name = subject_data.trial_seq[0]["scenario_name"] train_action_limit = 3 train_attempt_limit = max_attempts test_action_limit = 3 test_attempt_limit = max_attempts # human subjects use role lever_index_mode = "role" # minimal construction of params params = dict() params["use_physics"] = True params["train_scenario_name"] = train_scenario_name params["src_dir"] = None env = Agent.pre_instantiation_setup(params, bypass_confirmation=True) env.lever_index_mode = lever_index_mode # setup dummy window so we can start recording env.setup_trial( "CC3", action_limit=3, attempt_limit=30, multiproc=False, ) env.reset() input("Press enter to start") # for each trial, setup the env and execute all of the action sequences for trial in subject_data.trial_seq: trial_scenario_name = trial["scenario_name"] # 3 lever trial if trial_scenario_name == "CE3" or trial_scenario_name == "CC3": action_limit = train_action_limit attempt_limit = train_attempt_limit # 4 lever trial elif trial_scenario_name == "CE4" or trial_scenario_name == "CC4": action_limit = test_action_limit attempt_limit = test_attempt_limit # corrects a bug where some 4 lever testing trials did not properly save their trial name as a string if not isinstance(trial["name"], str): # we can directly reencode the bytes back into a python string raw_dtype, raw_bytes = jsonpickle.decode(json.dumps(trial["name"]["py/reduce"][1])) trial["name"] = decode_bad_jsonpickle_str(raw_bytes) else: raise ValueError("Unknown scenario name") # setup the env for the trial env.setup_trial( trial_scenario_name, action_limit=action_limit, attempt_limit=attempt_limit, specified_trial=trial["name"], multiproc=False, ) # go through every attempt in the trial for attempt_seq in trial["attempt_seq"]: env.reset() # go through every action sequence in this attempt action_seq = attempt_seq["action_seq"] done = False action_num = 0 # render in a loop while not done: # execute the next action if an action is not currently executing if env.action_executing is False: action_str = action_seq[action_num]["name"] action_env = env.action_map[action_str] env.step(action_env) action_num += 1 env.render(env) done = env.determine_attempt_finished() print('hi')