def main(params: Parameters) -> None: root_output_directory = params.creatable_directory("output_directory") curriculum_string = params.string("curriculum", valid_options=STR_TO_CURRICULUM.keys(), default="phase1") language_mode = params.enum("language_mode", LanguageMode, default=LanguageMode.ENGLISH) language_string = str(language_mode).split(".")[-1].lower() num_samples = params.optional_positive_integer("num_samples") num_noise_objects = params.optional_positive_integer("num_noise_objects") phase1_curriculum_dir = root_output_directory / language_string / curriculum_string phase1_curriculum_dir.mkdir(parents=True, exist_ok=True) # We lazily instantiate the curriculum so we don't need to worry # about any of them we don't actually use. curriculum_to_render = STR_TO_CURRICULUM[curriculum_string]( num_samples, num_noise_objects, phase2_language_generator(language_mode)) sort_by_utterance_length_flag = params.boolean("sort_by_utterance", default=False) if sort_by_utterance_length_flag: random_seed = params.integer("random_seed", default=1) CurriculumToHtmlDumper().dump_to_html_as_sorted_by_utterance_length( curriculum_to_render, output_directory=phase1_curriculum_dir, title="GAILA Phase 1 Curriculum Sorted by Utterance Length", curriculum_string=curriculum_string, random_seed=random_seed, ) else: CurriculumToHtmlDumper().dump_to_html( curriculum_to_render, output_directory=phase1_curriculum_dir, title="GAILA Phase 1 Curriculum", )
def build_relation_learner_factory( params: Parameters, beam_size: int, language_mode: LanguageMode) -> Optional[TemplateLearner]: learner_type = params.string("learner_type", valid_options=["subset", "pursuit", "none"], default="subset") ontology, _, _ = ONTOLOGY_STR_TO_ONTOLOGY[params.string( "ontology", valid_options=ONTOLOGY_STR_TO_ONTOLOGY.keys(), default="phase2")] if learner_type == "subset": return SubsetRelationLearnerNew(ontology=ontology, beam_size=beam_size, language_mode=language_mode) elif learner_type == "pursuit": rng = random.Random() rng.seed(params.integer("random_seed", default=0)) return PursuitRelationLearnerNew( learning_factor=params.floating_point("learning_factor"), graph_match_confirmation_threshold=params.floating_point( "graph_match_confirmation_threshold"), lexicon_entry_threshold=params.floating_point( "lexicon_entry_threshold"), rng=rng, smoothing_parameter=params.floating_point("smoothing_parameter"), ontology=ontology, language_mode=language_mode, ) elif learner_type == "none": # We don't want to include this learner type. return None else: raise RuntimeError("Relation learner type invalid ")
def log_experiment_entry_point(params: Parameters) -> None: experiment_name = params.string("experiment") debug_log_dir = params.optional_creatable_directory("debug_log_directory") graph_logger: Optional[HypothesisLogger] if debug_log_dir: logging.info("Debug graphs will be written to %s", debug_log_dir) graph_logger = HypothesisLogger(debug_log_dir, enable_graph_rendering=True) else: graph_logger = None logger = LearningProgressHtmlLogger.create_logger(params) language_mode = params.enum("language_mode", LanguageMode, default=LanguageMode.ENGLISH) (training_instance_groups, test_instance_groups) = curriculum_from_params(params, language_mode) execute_experiment( Experiment( name=experiment_name, training_stages=training_instance_groups, learner_factory=learner_factory_from_params( params, graph_logger, language_mode), pre_example_training_observers=[ logger.pre_observer(), CandidateAccuracyObserver("pre-acc-observer"), ], post_example_training_observers=[logger.post_observer()], test_instance_groups=test_instance_groups, test_observers=[logger.test_observer()], sequence_chooser=RandomChooser.for_seed(0), ), log_path=params.optional_creatable_directory("hypothesis_log_dir"), log_hypotheses_every_n_examples=params.integer( "log_hypothesis_every_n_steps", default=250), log_learner_state=params.boolean("log_learner_state", default=True), learner_logging_path=params.optional_creatable_directory( "experiment_group_dir"), starting_point=params.integer("starting_point", default=-1), point_to_log=params.integer("point_to_log", default=0), load_learner_state=params.optional_existing_file("learner_state_path"), )
def build_pursuit_curriculum( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[ HighLevelSemanticsSituation, LinearizedDependencyTree ], *, pursuit_curriculum_params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: num_instances = pursuit_curriculum_params.integer( "num_instances", default=num_samples if num_samples else 10 ) num_noise_instances = pursuit_curriculum_params.integer( "num_noise_instances", default=num_noise_objects if num_noise_objects else 2 ) num_objects_in_instance = pursuit_curriculum_params.integer( "num_objects_in_instance", default=3 ) add_gaze = pursuit_curriculum_params.boolean("add_gaze", default=False) prob_given = pursuit_curriculum_params.floating_point("prob_given", default=1.0) prob_not_given = pursuit_curriculum_params.floating_point( "prob_not_given", default=0.0 ) rng = random.Random() rng.seed(0) gaze_perciever = GazePerceivedNoisily( rng=rng, prob_gaze_perceived_given_gaze=prob_given, prob_gaze_perceived_given_not_gaze=prob_not_given, ) perception_generator = HighLevelSemanticsSituationToDevelopmentalPrimitivePerceptionGenerator( ontology=GAILA_PHASE_2_ONTOLOGY, gaze_strategy=gaze_perciever ) return [ make_simple_pursuit_curriculum( target_objects=M6_CURRICULUM_ALL_OBJECTS, num_instances=num_instances, num_objects_in_instance=num_objects_in_instance, num_noise_instances=num_noise_instances, language_generator=language_generator, add_gaze=add_gaze, perception_generator=perception_generator, ) ]
def main(params: Parameters): input_file_path = params.existing_file("input_file") output_file_path = params.creatable_file("output_file") x = params.integer("x") logging.info("Reading from input file: %s", str(input_file_path.absolute())) with input_file_path.open() as input_file: with output_file_path.open("w") as output_file: for num in input_file: output_file.write(f"{int(num)*x}\n") logging.info("Writing to output file: %s", str(input_file_path.absolute())) # Pause so that we can examine the job on the SAGA cluster time.sleep(30)
def main(params: Parameters): with byte_key_value_source_from_params(params) as input_source: keys = list(input_source.keys()) num_to_sample = min(params.positive_integer(_NUM_TO_SAMPLE_PARAM), len(keys)) random.shuffle( keys, random=random.Random(params.integer(_RANDOM_SEED_PARAM, default=0)).random, ) keys_to_keep = keys[:num_to_sample] output_zip_path = params.creatable_file("output_zip_path") logging.info("Downsampling %s files to %s", num_to_sample, output_zip_path) with KeyValueSink.zip_bytes_sink(output_zip_path) as out: for key in keys_to_keep: out.put(key, input_source[key])
def build_object_learner_experiment_curriculum_train( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[ HighLevelSemanticsSituation, LinearizedDependencyTree ], *, params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: situations = make_multiple_object_situation( num_samples, num_noise_objects, language_generator ) accurate_language_chance = params.floating_point( "accurate_language_percentage", default=0.5 ) output_situations = [] random.seed(params.integer("random_seed", default=0)) rng = RandomChooser.for_seed(params.integer("language_random_seed", default=0)) for (situation, language, perception) in situations.instances(): if random.random() <= accurate_language_chance: output_language = language else: # Make Invalid Language if situation and isinstance(situation, HighLevelSemanticsSituation): # First, gather all OntologyNodes which aren't already present in the situation present_ontology_nodes = [ _object.ontology_node for _object in situation.all_objects ] valid_other_objects = [ node for node in PHASE_1_CURRICULUM_OBJECTS if node not in present_ontology_nodes ] # Then choose one at random chosen_ontology_node = rng.choice(valid_other_objects) # Make a fake situation with just this object in it, ignoring colors wrong_situation = HighLevelSemanticsSituation( ontology=GAILA_PHASE_2_ONTOLOGY, salient_objects=[ SituationObject.instantiate_ontology_node( chosen_ontology_node, ontology=GAILA_PHASE_2_ONTOLOGY ) ], syntax_hints=[IGNORE_COLORS], ) # Generate the language as if it came from this fake situation rather than the original one fake_language = only( language_generator.generate_language(wrong_situation, chooser=rng) ) output_language = LinearizedDependencyTree( dependency_tree=fake_language.dependency_tree, surface_token_order=fake_language.surface_token_order, accurate=False, ) else: raise RuntimeError( f"Unable to make invalid language without a situation of type HighlevelSemanticsSituation. Got situation: {situation}" ) output_situations.append((situation, output_language, perception)) return [ AblatedLanguageSituationsInstanceGroup( name=f"{situations.name()}_ablated", instances=output_situations ) ]
def gaze_ablation_runner_entry_point(params: Parameters) -> None: """This function creates all possible gaze ablation param files within a given range""" initialize_vista_pegasus_wrapper(params) # Get the baseline experiment parameters for gaze ablation -- these are things common to all of # the experiments, like: # # include_image_links: true # sort_learner_descriptions_by_length: True # num_pretty_descriptions: 5 baseline_parameters = params.namespace("gaze_ablation") # get the minimum and maximum number of objects in a scene min_num_objects = params.integer("min_num_objects", default=1) max_num_objects = params.integer("max_num_objects", default=7) # this gets the number of different accuracies to try; default = increment by 0.1 num_accuracy_increments = params.integer("num_increments", default=11) values_for_accuracy = np.linspace(0, 1, num_accuracy_increments) # the number of noise instances to be included min_num_noise_instances = params.integer("min_num_noise", default=0) max_num_noise_instances = params.integer("max_num_noise", default=0) # get the number of instances in the entire curriculum min_num_instances_in_curriculum = params.integer("min_instances", default=10) max_num_instances_in_curriculum = params.integer("max_instances", default=20) # all possible numbers of noise instances for num_noise_instances in range(min_num_noise_instances, max_num_noise_instances + 1): # all possible numbers of instances in the curriculum for num_instances in range(min_num_instances_in_curriculum, max_num_instances_in_curriculum + 1): # all possible numbers of instances for num_objects_in_instance in range(min_num_objects, max_num_objects + 1): # all possible accuracies for prob_given in values_for_accuracy: for prob_not_given in values_for_accuracy: # both ignoring and perceiving gaze for add_gaze in [True, False]: # Define the experiment name, which is used both as a job name and to # choose a directory in which to store the experiment results. experiment_name_string = EXPERIMENT_NAME_FORMAT.format( num_instances=num_instances, num_noise_instances=num_noise_instances, num_objects_in_instance=num_objects_in_instance, prob_given=prob_given, prob_not_given=prob_not_given, add_gaze=add_gaze, ) experiment_name = Locator( experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify( FIXED_PARAMETERS).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "pursuit-curriculum-params": { "num_instances": num_instances, "num_noise_instances": num_noise_instances, "num_objects_in_instance": num_objects_in_instance, "add_gaze": add_gaze, "prob_given": float(prob_given), "prob_not_given": float(prob_not_given), }, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[], ) write_workflow_description()
def main( params: Parameters, scenes_iterable_input: Optional[Iterable[Phase1InstanceGroup]] = None, output_directory: Optional[Path] = None, visualizer: Optional[SituationVisualizer] = None, ) -> None: language_mode = params.enum("language_mode", LanguageMode, default=LanguageMode.ENGLISH) if scenes_iterable_input is None: scenes_iterable: Iterable[Phase1InstanceGroup] = [ make_curriculum(None, None, phase2_language_generator(language_mode)) ] else: scenes_iterable = scenes_iterable_input num_iterations = params.positive_integer("iterations") steps_before_vis = params.positive_integer("steps_before_vis") specific_scene = params.optional_positive_integer("scene") automatically_save_renderings = params.boolean( "automatically_save_renderings", default=False) if "experiment_group_dir" in params: rendering_filename_generator = from_experiment_filename_generator else: rendering_filename_generator = default_filename_generator screenshot_dir = output_directory random.seed(params.integer("seed")) np.random.seed(params.integer("seed")) if params.string("debug_bounding_boxes", default="off") == "on": debug_bounding_boxes = True else: debug_bounding_boxes = False if params.string("gaze_arrows", default="off") == "on": gaze_arrows = True else: gaze_arrows = False # go through curriculum scenes and output geometry types if visualizer is None: viz = SituationVisualizer() else: viz = visualizer viz.clear_scene() model_scales = viz.get_model_scales() for object_type, multiplier in OBJECT_SCALE_MULTIPLIER_MAP.items(): if object_type in model_scales: v3 = model_scales[object_type] new_v3 = (v3[0] * multiplier, v3[1] * multiplier, v3[2] * multiplier) model_scales[object_type] = new_v3 else: model_scales[object_type] = (multiplier, multiplier, multiplier) for model_name, scale in model_scales.items(): logging.info("SCALE: %s -> %s", model_name, scale.__str__()) # used to start a frame from where the previous one left off previous_model_positions: Optional[PositionsMap] = None for scene_number, scene_elements in enumerate( SceneCreator.create_scenes(scenes_iterable)): # If a scene number is provided in the params file, only render that scene if specific_scene and scene_number < specific_scene: continue if specific_scene and scene_number > specific_scene: break scene_filename = rendering_filename_generator(scene_number, scene_elements) if scene_filename in _FILENAMES_USED: continue _FILENAMES_USED.add(scene_filename) print(f"SCENE {scene_number}") viz.set_title(" ".join(token for token in scene_elements.tokens) + " (" + str(scene_elements.current_frame + 1) + "/" + str(scene_elements.total_frames) + ")") # if this is a new scene, forget the positions from the last scene if scene_elements.current_frame == 0: previous_model_positions = None if automatically_save_renderings: # if in auto mode and scene contains an excluded vocab word, skip it skip_scene = False for token in scene_elements.tokens: if token in EXCLUDED_VOCAB: skip_scene = True if skip_scene: continue # for debugging purposes: # SceneCreator.graph_for_each(scene_elements.object_graph, print_obj_names) # bind visualizer and properties to top level rendering function: bound_render_obj = partial(render_obj, viz, scene_elements.property_map, previous_model_positions) # bind visualizer and properties to nested obj rendering function bound_render_nested_obj = partial(render_obj_nested, viz, scene_elements.property_map, previous_model_positions) # render each object in graph SceneCreator.graph_for_each_top_level(scene_elements.object_graph, bound_render_obj, bound_render_nested_obj) # apply scale to top level nodes in scene for node in scene_elements.object_graph: if (node.name not in OBJECT_NAMES_TO_EXCLUDE and node.name.split("_")[0] in OBJECT_SCALE_MULTIPLIER_MAP): viz.multiply_scale( node.name, OBJECT_SCALE_MULTIPLIER_MAP[node.name.split("_")[0]]) # find the Region relations that refer to separate objects: # (e.g. the cookie is in the region of the hand (of the person), not the leg-segment in in the region of the torso). inter_object_in_region_map: DefaultDict[ ObjectPerception, List[Region[ObjectPerception]]] = defaultdict(list) for top_level_node in scene_elements.object_graph: if top_level_node.perceived_obj in scene_elements.in_region_map: inter_object_in_region_map[ top_level_node. perceived_obj] = scene_elements.in_region_map[ top_level_node.perceived_obj] # print(inter_object_in_region_map) # we want to assemble a lookup of the offsets (position) of each object's subobjects. sub_object_offsets = {} for node_name, node in viz.geo_nodes.items(): child_node_to_offset = {} recurse_list: List[NodePath] = node.children while recurse_list: next_batch: List[NodePath] = [] for child in recurse_list: next_batch += child.children # make sure this is a sub-object if child.hasMat() and child.parent.name != node_name: # child has non-identity transformation matrix applied to it (transform differs from parent) # TODO: we could re-export all of the models in such a way to eliminate this extra layer # in the scene graph child_node_to_offset[ child.parent.name] = child.get_pos() recurse_list = next_batch sub_object_offsets[node_name] = child_node_to_offset # handle skipping scene if not automatically_save_renderings: viz.run_for_seconds(1) skip_command = input("type 's' and hit ENTER to skip this scene") if skip_command == "s": viz.clear_scene() viz.run_for_seconds(0.25) continue handle_to_in_region_map = { object_perception.debug_handle: region_list for object_perception, region_list in inter_object_in_region_map.items() } frozen_objects = objects_to_freeze( handle_to_in_region_map, scene_elements.situation, scene_elements.situation_object_to_handle, ) if scene_elements.interpolated_scene_moving_items: # freeze everything not included in the interpolated scene frozen_objects = (immutableset([ key.debug_handle for key in scene_elements.in_region_map.keys() ]) - scene_elements.interpolated_scene_moving_items) # now that every object has been instantiated into the scene, # they need to be re-positioned. repositioned_map = None for repositioned_map in _solve_top_level_positions( top_level_objects=immutableset([ node.perceived_obj for node in scene_elements.object_graph if node.name not in OBJECT_NAMES_TO_EXCLUDE ]), sub_object_offsets=sub_object_offsets, in_region_map=inter_object_in_region_map, model_scales=model_scales, frozen_objects=frozen_objects, iterations=num_iterations, yield_steps=steps_before_vis, previous_positions=previous_model_positions, ): viz.clear_debug_nodes() viz.clear_gaze_arrows() if not automatically_save_renderings: viz.run_for_seconds(0.25) viz.set_positions(repositioned_map) if debug_bounding_boxes: for name in repositioned_map.name_to_position: viz.add_debug_bounding_box( name, repositioned_map.name_to_position[name], repositioned_map.name_to_scale[name], ) if gaze_arrows: for handle, props in scene_elements.property_map.items(): for prop in props: if isinstance( prop, OntologyNode) and prop.handle == "gazed-at": viz.add_gaze_arrow( handle, repositioned_map.name_to_position[handle], repositioned_map.name_to_scale[handle], ) # the visualizer seems to need about a second to render an update if not automatically_save_renderings: viz.run_for_seconds(1) # viz.print_scene_graph() previous_model_positions = None # only store previous positions when continuing to next frame / scene previous_model_positions = repositioned_map viz.run_for_seconds(1) screenshot( automatically_save_renderings=automatically_save_renderings, filename=scene_filename, screenshot_dir=screenshot_dir, viz=viz, ) viz.clear_scene() viz.run_for_seconds(0.25)
def main(params: Parameters): adam_root = params.existing_directory("adam_root") m13_experiments_dir = adam_root / "parameters" / "experiments" / "m13" use_pegasus = params.boolean("use_pegasus", default=False) if use_pegasus: initialize_vista_pegasus_wrapper(params) param_files: List[Path] = [] if params.boolean("include_objects", default=True): param_files.append(m13_experiments_dir / "objects.params") if params.boolean("include_imprecise_size", default=True): param_files.append(m13_experiments_dir / "imprecise_size.params") if params.boolean("include_imprecise_temporal", default=True): param_files.append(m13_experiments_dir / "imprecise_temporal.params") if params.boolean("include_subtle_verb", default=True): param_files.append(m13_experiments_dir / "subtle_verb.params") if params.boolean("include_object_restrictions", default=True): param_files.append(m13_experiments_dir / "object_restrictions.params") if params.boolean("include_functionally_defined_objects", default=True): param_files.append(m13_experiments_dir / "functionally_defined_objects.params") if params.boolean("include_relations", default=True): param_files.append(m13_experiments_dir / "relations.params") if params.boolean("include_generics", default=True): param_files.append(m13_experiments_dir / "generics.params") if params.boolean("include_verbs_with_dynamic_prepositions", default=True): param_files.append( m13_experiments_dir / "events_with_dynamic_prepositions.params" ) if params.boolean("include_m9_complete", default=False): param_files.append(m13_experiments_dir / "m9_complete.params") if params.boolean("include_m13_complete", default=False): param_files.append(m13_experiments_dir / "m13_complete.params") if params.boolean("include_m13_shuffled", default=False): param_files.append(m13_experiments_dir / "m13_shuffled.params") # This activates a special "debug" curriculum, # which is meant to be edited in the code by a developer to do fine-grained debugging. if params.boolean("include_debug", default=False): param_files.append(m13_experiments_dir / "debug.params") # If any of the param files don't exist, bail out earlier instead of making the user # wait for the error. for param_file in param_files: if not param_file.exists(): raise RuntimeError(f"Expected param file {param_file} does not exist") for param_file in param_files: logging.info("Running %s", param_file) experiment_params = YAMLParametersLoader().load(param_file) if not use_pegasus: log_experiment_entry_point(experiment_params) else: experiment_name = Locator(experiment_params.string("experiment")) experiment_params = experiment_params.unify( { "experiment_group_dir": directory_for(experiment_name) / "output", "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", # State pickles will go under experiment_name/learner_state "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "log_hypothesis_every_n_steps": params.integer( "save_state_every_n_steps" ), "debug_learner_pickling": params.boolean( "debug_learner_pickling", default=False ), } ) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[] ) if use_pegasus: write_workflow_description()
def build_object_learner_factory( params: Parameters, beam_size: int, language_mode: LanguageMode) -> TemplateLearner: learner_type = params.string( "learner_type", valid_options=[ "subset", "pbv", "cross-situational", "pursuit", "recognizer" ], default="subset", ) ontology, objects, perception_gen = ONTOLOGY_STR_TO_ONTOLOGY[params.string( "ontology", valid_options=ONTOLOGY_STR_TO_ONTOLOGY.keys(), default="phase2")] if learner_type == "subset": return SubsetObjectLearnerNew(ontology=ontology, beam_size=beam_size, language_mode=language_mode) elif learner_type == "pbv": chooser = RandomChooser.for_seed( params.optional_integer("random_seed", default=0)) return ProposeButVerifyObjectLearner( graph_match_confirmation_threshold=params.floating_point( "graph_match_confirmation_threshold", default=0.8), rng=chooser, ontology=ontology, language_mode=language_mode, ) elif learner_type == "cross-situational": return CrossSituationalObjectLearner( graph_match_confirmation_threshold=params.floating_point( "graph_match_confirmation_threshold"), lexicon_entry_threshold=params.floating_point( "lexicon_entry_threshold"), smoothing_parameter=params.floating_point("smoothing_parameter"), expected_number_of_meanings=len( ontology.nodes_with_properties(THING)), ontology=ontology, language_mode=language_mode, ) elif learner_type == "pursuit": rng = random.Random() rng.seed(params.integer("random_seed", default=0)) return PursuitObjectLearnerNew( learning_factor=params.floating_point("learning_factor"), graph_match_confirmation_threshold=params.floating_point( "graph_match_confirmation_threshold"), lexicon_entry_threshold=params.floating_point( "lexicon_entry_threshold"), rng=rng, smoothing_parameter=params.floating_point("smoothing_parameter"), ontology=ontology, language_mode=language_mode, ) elif learner_type == "recognizer": object_recognizer = ObjectRecognizer.for_ontology_types( objects, determiners=ENGLISH_DETERMINERS, ontology=ontology, language_mode=language_mode, perception_generator=perception_gen, ) return ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode) else: raise RuntimeError("Object learner type invalid")
def log_experiment_entry_point(params: Parameters) -> None: experiment_name = params.string("experiment") debug_log_dir = params.optional_creatable_directory("debug_log_directory") graph_logger: Optional[HypothesisLogger] if debug_log_dir: logging.info("Debug graphs will be written to %s", debug_log_dir) graph_logger = HypothesisLogger(debug_log_dir, enable_graph_rendering=True) else: graph_logger = None logger = LearningProgressHtmlLogger.create_logger(params) language_mode = params.enum("language_mode", LanguageMode, default=LanguageMode.ENGLISH) curriculum_repository_path = params.optional_existing_directory( "load_from_curriculum_repository") if curriculum_repository_path: curriculum = read_experiment_curriculum(curriculum_repository_path, params, language_mode) (training_instance_groups, test_instance_groups) = ( curriculum.train_curriculum, curriculum.test_curriculum, ) else: (training_instance_groups, test_instance_groups) = curriculum_from_params(params, language_mode) experiment_group_dir = params.optional_creatable_directory( "experiment_group_dir") resume_from_last_logged_state = params.boolean( "resume_from_latest_logged_state", default=False) # Check if we have explicit observer states to load observers_state = params.optional_existing_file("observers_state_path") test_observer = [] # type: ignore pre_observer = [] # type: ignore post_observer = [] # type: ignore if resume_from_last_logged_state and observers_state: raise RuntimeError( f"Can not resume from last logged state and provide explicit observer state paths" ) if resume_from_last_logged_state: if not experiment_group_dir: raise RuntimeError( "experiment_group_dir must be specified when resume_from_last_logged_state is true." ) # Try to Load Observers for _, observers_state_path in observer_states_by_most_recent( cast(Path, experiment_group_dir) / "observer_state", "observers_state_at_"): try: with observers_state_path.open("rb") as f: observers_holder = pickle.load(f) pre_observer = observers_holder.pre_observers post_observer = observers_holder.post_observers test_observer = observers_holder.test_observers except OSError: logging.warning( "Unable to open observer state at %s; skipping.", str(observers_state_path), ) except pickle.UnpicklingError: logging.warning( "Couldn't unpickle observer state at %s; skipping.", str(observers_state_path), ) if not pre_observer and not post_observer and not test_observer: logging.warning("Reverting to default observers.") pre_observer = [ logger.pre_observer( # type: ignore params=params.namespace_or_empty("pre_observer"), experiment_group_dir=experiment_group_dir, ) ] post_observer = [ logger.post_observer( # type: ignore params=params.namespace_or_empty("post_observer"), experiment_group_dir=experiment_group_dir, ) ] test_observer = [ logger.test_observer( # type: ignore params=params.namespace_or_empty("test_observer"), experiment_group_dir=experiment_group_dir, ) ] elif observers_state: try: with observers_state.open("rb") as f: observers_holder = pickle.load(f) pre_observer = observers_holder.pre_observers post_observer = observers_holder.post_observers test_observer = observers_holder.test_observers except OSError: logging.warning("Unable to open observer state at %s; skipping.", str(observers_state)) except pickle.UnpicklingError: logging.warning( "Couldn't unpickle observer state at %s; skipping.", str(observers_state)) else: pre_observer = [ logger.pre_observer( # type: ignore params=params.namespace_or_empty("pre_observer"), experiment_group_dir=experiment_group_dir, ) ] post_observer = [ logger.post_observer( # type: ignore params=params.namespace_or_empty("post_observer"), experiment_group_dir=experiment_group_dir, ) ] test_observer = [ logger.test_observer( # type: ignore params=params.namespace_or_empty("test_observer"), experiment_group_dir=experiment_group_dir, ) ] execute_experiment( Experiment( name=experiment_name, training_stages=training_instance_groups, learner_factory=learner_factory_from_params( params, graph_logger, language_mode), pre_example_training_observers=pre_observer, post_example_training_observers=post_observer, test_instance_groups=test_instance_groups, test_observers=test_observer, sequence_chooser=RandomChooser.for_seed(0), ), log_path=params.optional_creatable_directory("hypothesis_log_dir"), log_hypotheses_every_n_examples=params.integer( "log_hypothesis_every_n_steps", default=250), log_learner_state=params.boolean("log_learner_state", default=True), learner_logging_path=experiment_group_dir, starting_point=params.integer("starting_point", default=0), point_to_log=params.integer("point_to_log", default=0), load_learner_state=params.optional_existing_file("learner_state_path"), resume_from_latest_logged_state=resume_from_last_logged_state, debug_learner_pickling=params.boolean("debug_learner_pickling", default=False), )
def integrated_experiment_entry_point(params: Parameters) -> None: initialize_vista_pegasus_wrapper(params) baseline_parameters = params.namespace("integrated_learners_experiment") pursuit_resource_request_params = params.namespace( "pursuit_resource_request") # This code is commented out but may be used in the near future to add language ablation # Capabilities to this curriculum. # get the minimum and maximum accuracy of the language with the situation # min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1) # max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5) # num_language_accuracy_increment = params.integer( # "num_language_accuracy_increment", default=5 # ) # values_for_accuracy = np.linspace( # min_language_accuracy, max_language_accuracy, num_language_accuracy_increment # ) # Get if attributes or relations should be included include_attributes = params.boolean("include_attributes", default=True) include_relations = params.boolean("include_relations", default=True) limit_jobs_for_category( "pursuit_job_limit", params.integer("num_pursuit_learners_active", default=8)) curriculum_repository_path = params.creatable_directory( "curriculum_repository_path") # Job to build desired curriculum(s) which our learners use curriculum_dependencies = immutableset(( CURRICULUM_NAME_FORMAT.format( noise=add_noise, shuffled=shuffle, relations=include_relations, attributes=include_attributes, ), run_python_on_parameters( Locator( CURRICULUM_NAME_FORMAT.format( noise=add_noise, shuffled=shuffle, relations=include_relations, attributes=include_attributes, ).split("-")), generate_curriculum_script, baseline_parameters.unify({ "train_curriculum": Parameters.from_mapping(CURRICULUM_PARAMS).unify( { "add_noise": add_noise, "shuffled": shuffle, "include_attributes": include_attributes, "include_relations": include_relations, }).as_mapping() }).unify(FIXED_PARAMETERS).unify( {"curriculum_repository_path": curriculum_repository_path}), depends_on=[], ), Parameters.from_mapping(CURRICULUM_PARAMS).unify( { "add_noise": add_noise, "shuffled": shuffle, "include_attributes": include_attributes, "include_relations": include_relations, }), ) for add_noise in (True, False) for shuffle in (True, False)) # jobs to build experiment for (curriculum_str, curriculum_dep, curr_params) in curriculum_dependencies: object_learner_type = params.string( "object_learner.learner_type", valid_options=["pursuit", "subset", "pbv"], default="pursuit", ) attribute_learner_type = params.string( "attribute_learner.learner__type", valid_options=["none", "pursuit", "subset"], default="pursuit", ) relation_learner_type = params.string( "relation_learner.learner_type", valid_options=["none", "pursuit", "subset"], default="pursuit", ) experiment_name_string = EXPERIMENT_NAME_FORMAT.format( curriculum_name=curriculum_str.replace("-", "+"), object_learner=object_learner_type, attribute_learner=attribute_learner_type, relation_learner=relation_learner_type, ) experiment_name = Locator(experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify(FIXED_PARAMETERS).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "load_from_curriculum_repository": curriculum_repository_path, "train_curriculum": curr_params, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[curriculum_dep], resource_request=SlurmResourceRequest.from_parameters( pursuit_resource_request_params) if "pursuit" in [ object_learner_type, attribute_learner_type, relation_learner_type ] else None, category="pursuit" if "pursuit" in [ object_learner_type, attribute_learner_type, relation_learner_type ] else "subset", use_pypy=True, ) write_workflow_description()
def create_gaze_ablation_entry_point(params: Parameters) -> None: """This function creates all possible gaze ablation param files within a given range""" # get the parameters directory, which must be non-null parameters_dir = params.creatable_directory("parameters_directory") if not parameters_dir: raise RuntimeError( "Must specify a directory where you wish to write your param files" ) # get the minimum and maximum number of objects in a scene min_num_objects = params.integer("min_num_objects", default=1) max_num_objects = params.integer("max_num_objects", default=7) # this gets the number of different accuracies to try; default = increment by 0.1 num_accuracy_increments = params.integer("num_increments", default=11) values_for_accuracy = np.linspace(0, 1, num_accuracy_increments) # the number of noise instances to be included min_num_noise_instances = params.integer("min_num_noise", default=0) max_num_noise_instances = params.integer("max_num_noise", default=0) # get the number of instances in the entire curriculum min_num_instances_in_curriculum = params.integer("min_instances", default=10) max_num_instances_in_curriculum = params.integer("max_instances", default=20) # all possible numbers of noise instances for num_noise_instances in range(min_num_noise_instances, max_num_noise_instances + 1): # all possible numbers of instances in the curriculum for num_instances in range(min_num_instances_in_curriculum, max_num_instances_in_curriculum + 1): # all possible numbers of instances for num_objects_in_instance in range(min_num_objects, max_num_objects + 1): # all possible accuracies for prob_given in values_for_accuracy: for prob_not_given in values_for_accuracy: # both ignoring and perceiving gaze for add_gaze in [True, False]: # add the required arguments to create a unique filename file_name = FILE_NAME_STRING.format( num_instances=num_instances, num_noise_instances=num_noise_instances, num_objects_in_instance=num_objects_in_instance, prob_given=prob_given, prob_not_given=prob_not_given, add_gaze=add_gaze, ) # format the arguments in the parameter file and write them out param_file_string = PARAM_FILE_STRING.format( experiment=file_name, num_instances=num_instances, num_noise_instances=num_noise_instances, num_objects_in_instance=num_objects_in_instance, add_gaze=add_gaze, prob_given=prob_given, prob_not_given=prob_not_given, ) with open(f"{parameters_dir}/{file_name}", "a") as f: f.write(param_file_string)
def object_language_ablation_runner_entry_point(params: Parameters) -> None: """This function creates all possible object language ablation param files within a given range""" initialize_vista_pegasus_wrapper(params) baseline_parameters = params.namespace("object_language_ablation") pursuit_resource_request_params = params.namespace( "pursuit_resource_request") # get the minimum and maximum number of objects in a scene min_num_objects = params.integer("min_num_objects", default=1) max_num_objects = params.integer("max_num_objects", default=7) # get the minimum and maximum accuracy of the language with the situation min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1) max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5) num_language_accuracy_increment = params.integer( "num_language_accuracy_increment", default=5) values_for_accuracy = np.linspace(min_language_accuracy, max_language_accuracy, num_language_accuracy_increment) limit_jobs_for_category( "pursuit", params.integer("num_pursuit_learners_active", default=8)) for num_objects in range(min_num_objects, max_num_objects + 1): for language_accuracy in values_for_accuracy: for learner_type in LEARNER_VALUES_TO_PARAMS: for params_str, learner_params in LEARNER_VALUES_TO_PARAMS[ learner_type]: experiment_name_string = EXPERIMENT_NAME_FORMAT.format( num_objects=num_objects, language_accuracy=language_accuracy, learner_type=learner_type, learner_params=params_str, ) experiment_name = Locator( experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify( FIXED_PARAMETERS ).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "train_curriculum": { "accurate_language_percentage": float(language_accuracy) }, "object_learner_type": learner_type, "object_learner": learner_params, # We subtract one because the target object is a given "num_noise_objects": num_objects - 1, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[], resource_request=SlurmResourceRequest.from_parameters( pursuit_resource_request_params) if learner_type == "pursuit" else None, category=learner_type, ) write_workflow_description()
def integrated_pursuit_learner_experiment_curriculum( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[ HighLevelSemanticsSituation, LinearizedDependencyTree ], *, params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: # Load Parameters add_noise = params.boolean("add_noise", default=False) block_multiple_of_same_type = params.boolean( "block_multiple_of_same_type", default=True ) include_targets_in_noise = params.boolean("include_targets_in_noise", default=False) min_noise_objects = params.integer("min_noise_objects", default=0) max_noise_objects = params.integer( "max_noise_objects", default=num_noise_objects if num_noise_objects else 10 ) min_noise_relations = params.integer("min_noise_relations", default=0) max_noise_relations = params.integer("max_noise_relations", default=5) # This value ensure that pursuit gets at least 6 instances of any example # As otherwise the lexicalization system might not lexicalize it # But if there's lots of variants for noise we don't want to have thousands of examples # As could happen combinatorially min_samples_per_noise_object_relation_pair = ( max( 6 // ( max_noise_relations - min_noise_relations + min_noise_objects - max_noise_objects ), 1, ) if add_noise else 6 ) if num_samples is None: num_samples = 50 # Random Number Generator for Curriculum Use rng = random.Random() rng.seed(params.integer("random_seed", default=0)) # Random Chooser for Curriculum Generation chooser = RandomChooser.for_seed(params.integer("chooser_seed", default=0)) # Noise Elements noise_objects_sets: ImmutableSet[ImmutableSet[TemplateObjectVariable]] = immutableset( [ immutableset( [ standard_object( f"{i}_noise_object_{num}", THING, required_properties=[INTEGRATED_EXPERIMENT_PROP], ) for num in range(i) ] ) for i in range(min_noise_objects, max_noise_objects) ] ) if noise_objects_sets.empty() or not add_noise: noise_objects_sets = immutableset(immutableset()) target_objects = [ standard_object(node.handle, node) for node in INTEGRATED_EXPERIMENT_CURRICULUM_OBJECTS ] target_color_objects = [ standard_object(f"{node.handle}_{color.handle}", node, added_properties=[color]) for node in INTEGRATED_EXPERIMENT_CURRICULUM_OBJECTS for color in INTEGRATED_EXPERIMENT_COLORS if node not in [ZUP, SPAD, DAYGIN, MAWG, TOMBUR, GLIM] ] # We use a max of 1 here to account for when noise values are not used as otherwise # We'd be multiplying by 0 and cause div by 0 errors samples_to_template_den = ( len(target_objects) * max(len(noise_objects_sets), 1) * max((max_noise_relations - min_noise_relations), 1) ) ordered_curriculum = [ _single_object_described_curriculum( num_samples, target_objects, noise_objects_sets, min_noise_relations=min_noise_relations, max_noise_relations=max_noise_relations, add_noise=add_noise, chooser=chooser, samples_to_template_den=samples_to_template_den, block_multiple_of_same_type=block_multiple_of_same_type, language_generator=language_generator, include_targets_in_noise=include_targets_in_noise, min_samples=min_samples_per_noise_object_relation_pair, ) ] if params.boolean("include_attributes", default=True): ordered_curriculum.append( _single_attribute_described_curriculum( num_samples, target_color_objects, noise_objects_sets, min_noise_relations=min_noise_relations, max_noise_relations=max_noise_relations, add_noise=add_noise, chooser=chooser, samples_to_template_den=samples_to_template_den, block_multiple_of_same_type=block_multiple_of_same_type, language_generator=language_generator, include_targets_in_noise=include_targets_in_noise, min_samples=min_samples_per_noise_object_relation_pair, ) ) if params.boolean("include_relations", default=True): ordered_curriculum.append( _prepositional_relation_described_curriculum( num_samples, noise_objects_sets, min_noise_relations=min_noise_relations, max_noise_relations=max_noise_relations, add_noise=add_noise, chooser=chooser, samples_to_template_den=samples_to_template_den, block_multiple_of_same_type=block_multiple_of_same_type, language_generator=language_generator, include_targets_in_noise=include_targets_in_noise, min_samples=min_samples_per_noise_object_relation_pair, ) ) # Convert the 'from situation instances' into explicit instances this allows for # 1) Less computation time on the learner experiment to generate the perception graphs # 2) Allows us to shuffle the output order which we otherwise can't do explicit_instances = [ instance for sit in ordered_curriculum for instance in sit.instances() ] return [ ExplicitWithSituationInstanceGroup( name="m18-integrated-learners-experiment", instances=tuple(shuffle_curriculum(explicit_instances, rng=rng)) if params.boolean("shuffled", default=False) else tuple(explicit_instances), ) ]
def integrated_pursuit_learner_experiment_test( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[ HighLevelSemanticsSituation, LinearizedDependencyTree ], *, params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: # pylint: disable=unused-argument # Random Number Generator for Curriculum Use rng = random.Random() rng.seed(params.integer("random_seed", default=1)) # Random Chooser for Curriculum Generation chooser = RandomChooser.for_seed(params.integer("chooser_seed", default=1)) if num_samples is None: num_samples = 5 target_objects = [ standard_object(node.handle, node) for node in INTEGRATED_EXPERIMENT_CURRICULUM_OBJECTS ] target_color_objects = [ standard_object(f"{node.handle}_{color.handle}", node, added_properties=[color]) for node in INTEGRATED_EXPERIMENT_CURRICULUM_OBJECTS for color in INTEGRATED_EXPERIMENT_COLORS if node not in [ZUP, SPAD, DAYGIN, MAWG, TOMBUR, GLIM] ] ordered_curriculum = [ _single_object_described_curriculum( num_samples, target_objects, immutableset(immutableset()), add_noise=False, chooser=chooser, block_multiple_of_same_type=True, language_generator=language_generator, min_samples=num_samples, ) ] if params.boolean("include_attributes", default=True): ordered_curriculum.append( _single_attribute_described_curriculum( num_samples, target_color_objects, immutableset(immutableset()), add_noise=False, chooser=chooser, block_multiple_of_same_type=True, language_generator=language_generator, min_samples=num_samples, ) ) if params.boolean("include_relations", default=True): ordered_curriculum.append( _prepositional_relation_described_curriculum( num_samples, immutableset(immutableset()), add_noise=False, chooser=chooser, block_multiple_of_same_type=True, language_generator=language_generator, ) ) # Convert the 'from situation instances' into explicit instances this allows for # 1) Less computation time on the learner experiment to generate the perception graphs # 2) Allows us to shuffle the output order which we otherwise can't do explicit_instances = [ instance for sit in ordered_curriculum for instance in sit.instances() ] return [ ExplicitWithSituationInstanceGroup( name="m18-integrated-learners-experiment-test", instances=tuple(shuffle_curriculum(explicit_instances, rng=rng)) if params.boolean("shuffled", default=False) else tuple(explicit_instances), ) ]
def from_parameters(params: Parameters) -> "TestObj": return TestObj(params.integer("my_int"))