def add_saga_cluster_to_sites( sites_catalog: SiteCatalog, params: Parameters = Parameters.empty()) -> None: home = params.string("home_dir", default=str(Path.home().absolute())) data_configuration = params.string("data_configuration", default="sharedfs") shared_scratch_dir = params.string( "saga_shared_scratch", default=f"{home}/workflows/shared-scratch") saga = Site("saga", arch=Arch.X86_64, os_type=OS.LINUX) saga.add_directories( Directory(Directory.SHARED_SCRATCH, shared_scratch_dir).add_file_servers( FileServer("file://" + shared_scratch_dir, Operation.ALL))) saga.add_env(key="PEGASUS_HOME", value="/nas/gaia/shared/cluster/pegasus5/pegasus-5.0.0") # Profiles saga.add_pegasus_profile(style="glite", auxillary_local=True, data_configuration=data_configuration) saga.add_condor_profile(grid_resource="batch slurm") sites_catalog.add_sites(saga)
def build_relation_learner_factory( params: Parameters, beam_size: int, language_mode: LanguageMode) -> Optional[TemplateLearner]: learner_type = params.string("learner_type", valid_options=["subset", "pursuit", "none"], default="subset") ontology, _, _ = ONTOLOGY_STR_TO_ONTOLOGY[params.string( "ontology", valid_options=ONTOLOGY_STR_TO_ONTOLOGY.keys(), default="phase2")] if learner_type == "subset": return SubsetRelationLearnerNew(ontology=ontology, beam_size=beam_size, language_mode=language_mode) elif learner_type == "pursuit": rng = random.Random() rng.seed(params.integer("random_seed", default=0)) return PursuitRelationLearnerNew( learning_factor=params.floating_point("learning_factor"), graph_match_confirmation_threshold=params.floating_point( "graph_match_confirmation_threshold"), lexicon_entry_threshold=params.floating_point( "lexicon_entry_threshold"), rng=rng, smoothing_parameter=params.floating_point("smoothing_parameter"), ontology=ontology, language_mode=language_mode, ) elif learner_type == "none": # We don't want to include this learner type. return None else: raise RuntimeError("Relation learner type invalid ")
def pre_observer( self, *, params: Parameters = Parameters.empty(), experiment_group_dir: Optional[Path] = None, ) -> "DescriptionObserver": # type: ignore track_accuracy = params.boolean("include_acc_observer", default=False) log_accuracy = params.boolean("accuracy_to_txt", default=False) log_accuracy_path = params.string( "accuracy_logging_path", default=f"{experiment_group_dir}/accuracy_pre_out.txt" if experiment_group_dir else "accuracy_pre_out.txt", ) track_precision_recall = params.boolean("include_pr_observer", default=False) log_precision_recall = params.boolean("log_pr", default=False) log_precision_recall_path = params.string( "pr_log_path", default=f"{experiment_group_dir}/pr_post_out.txt" if experiment_group_dir else "pr_post_out.txt", ) return HTMLLoggerPreObserver( name="Pre-observer", html_logger=self, candidate_accuracy_observer=CandidateAccuracyObserver( name="Pre-observer-acc", accuracy_to_txt=log_accuracy, txt_path=log_accuracy_path, ) if track_accuracy else None, precision_recall_observer=PrecisionRecallObserver( name="Pre-observer-pr", make_report=log_precision_recall, txt_path=log_precision_recall_path, ) if track_precision_recall else None, )
def from_parameters(params: Parameters) -> ResourceRequest: return SlurmResourceRequest( partition=params.string("partition"), num_cpus=params.optional_positive_integer("num_cpus"), num_gpus=params.optional_integer("num_gpus"), memory=MemoryAmount.parse(params.string("memory")) if "memory" in params else None, job_time_in_minutes=params.optional_integer("job_time_in_minutes"), )
def from_parameters(params: Parameters) -> Optional["SpackConfiguration"]: if SpackConfiguration.SPACK_ENVIRONMENT_PARAM in params: if SpackConfiguration.SPACK_PACKAGES_PARAM in params: raise RuntimeError( f"{SpackConfiguration.SPACK_ENVIRONMENT_PARAM} " f"and {SpackConfiguration.SPACK_PACKAGES_PARAM} are mutually exclusive" ) return SpackConfiguration( spack_root=params.existing_directory( SpackConfiguration.SPACK_ROOT_PARAM), spack_environment=params.string( SpackConfiguration.SPACK_ENVIRONMENT_PARAM), ) elif SpackConfiguration.SPACK_PACKAGES_PARAM in params: if SpackConfiguration.SPACK_ENVIRONMENT_PARAM in params: raise RuntimeError( f"{SpackConfiguration.SPACK_ENVIRONMENT_PARAM} " f"and {SpackConfiguration.SPACK_PACKAGES_PARAM} are mutually exclusive" ) return SpackConfiguration( spack_root=params.existing_directory( SpackConfiguration.SPACK_ROOT_PARAM), spack_packages=[ SpackPackage.parse(package_specifier) for package_specifier in params.arbitrary_list( SpackConfiguration.SPACK_PACKAGES_PARAM) ], ) else: return None
def main(params: Parameters) -> None: root_output_directory = params.creatable_directory("output_directory") curriculum_string = params.string("curriculum", valid_options=STR_TO_CURRICULUM.keys(), default="phase1") language_mode = params.enum("language_mode", LanguageMode, default=LanguageMode.ENGLISH) language_string = str(language_mode).split(".")[-1].lower() num_samples = params.optional_positive_integer("num_samples") num_noise_objects = params.optional_positive_integer("num_noise_objects") phase1_curriculum_dir = root_output_directory / language_string / curriculum_string phase1_curriculum_dir.mkdir(parents=True, exist_ok=True) # We lazily instantiate the curriculum so we don't need to worry # about any of them we don't actually use. curriculum_to_render = STR_TO_CURRICULUM[curriculum_string]( num_samples, num_noise_objects, phase2_language_generator(language_mode)) sort_by_utterance_length_flag = params.boolean("sort_by_utterance", default=False) if sort_by_utterance_length_flag: random_seed = params.integer("random_seed", default=1) CurriculumToHtmlDumper().dump_to_html_as_sorted_by_utterance_length( curriculum_to_render, output_directory=phase1_curriculum_dir, title="GAILA Phase 1 Curriculum Sorted by Utterance Length", curriculum_string=curriculum_string, random_seed=random_seed, ) else: CurriculumToHtmlDumper().dump_to_html( curriculum_to_render, output_directory=phase1_curriculum_dir, title="GAILA Phase 1 Curriculum", )
def main(params: Parameters): conda_script_generator = CondaJobScriptGenerator.from_parameters(params) entry_point = params.string("entry_point") work_dir = params.optional_creatable_directory( "working_directory") or Path(os.getcwd()) stdout_file = params.string("log_file") or work_dir / "___stdout.log" shell_script = conda_script_generator.generate_shell_script( entry_point_name=entry_point, param_file=params.existing_file("job_param_file"), working_directory=work_dir, stdout_file=stdout_file, ) params.creatable_file("conda_script_path").write_text( # type: ignore shell_script, encoding="utf-8") if params.boolean("echo_template", default=False): print(shell_script)
def from_parameters(params: Parameters) -> Optional["CondaConfiguration"]: if CondaConfiguration.CONDA_ENVIRONMENT_PARAM in params: return CondaConfiguration( conda_base_path=params.existing_directory("conda_base_path"), conda_environment=params.string( CondaConfiguration.CONDA_ENVIRONMENT_PARAM), ) else: return None
def test_observer( self, *, params: Parameters = Parameters.empty(), experiment_group_dir: Optional[Path] = None, ) -> "DescriptionObserver": # type: ignore # these are the params to use for writing accuracy to a text file at every iteration (e.g. to graph later) track_accuracy = params.boolean("include_acc_observer", default=True) log_accuracy = params.boolean("accuracy_to_txt", default=False) log_accuracy_path = params.string( "accuracy_logging_path", default=f"{experiment_group_dir}/accuracy_test_out.txt" if experiment_group_dir else "accuracy_test_out.txt", ) track_precision_recall = params.boolean("include_pr_observer", default=False) log_precision_recall = params.boolean("log_pr", default=False) log_precision_recall_path = params.string( "pr_log_path", default=f"{experiment_group_dir}/pr_test_out.txt" if experiment_group_dir else "pr_test_out.txt", ) accuracy_observer = None precision_recall_observer = None if track_accuracy: accuracy_observer = CandidateAccuracyObserver( name="Test-observer-acc", accuracy_to_txt=log_accuracy, txt_path=log_accuracy_path, ) if track_precision_recall: precision_recall_observer = PrecisionRecallObserver( name="Test-observer-pr", make_report=log_precision_recall, txt_path=log_precision_recall_path, ) return HTMLLoggerPostObserver( name="t-observer", html_logger=self, candidate_accuracy_observer=accuracy_observer, precision_recall_observer=precision_recall_observer, test_mode=True, )
def build_plural_learner_factory( params: Parameters, beam_size: int, language_mode: LanguageMode) -> Optional[TemplateLearner]: learner_type = params.string("learner_type", valid_options=["subset", "none"], default="subset") ontology, _, _ = ONTOLOGY_STR_TO_ONTOLOGY[params.string( "ontology", valid_options=ONTOLOGY_STR_TO_ONTOLOGY.keys(), default="phase2")] if learner_type == "subset": return SubsetPluralLearnerNew(ontology=ontology, beam_size=beam_size, language_mode=language_mode) elif learner_type == "none": # We don't want to include this learner type. return None else: raise RuntimeError("Plural learner type invalid ")
def add_local_nas_to_sites( sites_catalog: SiteCatalog, params: Parameters = Parameters.empty()) -> None: home = params.string("home_dir", default=str(Path.home().absolute())) shared_scratch_dir = params.string("local_shared_scratch", default=f"{home}/workflows/scratch") local_storage_dir = params.string("local_storage", default=f"{home}/workflows/output") sites_catalog.add_sites( Site("local", arch=Arch.X86_64, os_type=OS.LINUX).add_directories( Directory(Directory.SHARED_SCRATCH, shared_scratch_dir).add_file_servers( FileServer("file://" + shared_scratch_dir, Operation.ALL)), Directory(Directory.LOCAL_STORAGE, local_storage_dir).add_file_servers( FileServer("file://" + local_storage_dir, Operation.ALL)), ))
def from_parameters(params: Parameters) -> "WorkflowBuilder": workflow_directory = params.creatable_directory("workflow_directory") replica_catalog = workflow_directory / "rc.dat" if replica_catalog.exists(): replica_catalog.unlink() replica_catalog.touch(mode=0o744) return WorkflowBuilder( name=params.string("workflow_name", default="Workflow"), created_by=params.string("workflow_created", default="Default Constructor"), workflow_directory=workflow_directory, default_site=params.string("site"), conda_script_generator=CondaJobScriptGenerator.from_parameters( params), namespace=params.string("namespace"), default_resource_request=ResourceRequest.from_parameters(params), replica_catalog=replica_catalog, )
def from_parameters(params: Parameters) -> "ResourceRequest": """ Create a ResourceRequest from a given parameter file Current valid backend param values: "slurm" """ backend = params.string(_BACKEND_PARAM, valid_options=["slurm"], default="slurm") if backend == "slurm": return SlurmResourceRequest.from_parameters(params) else: raise RuntimeError(f"Invalid backend option {backend}")
def from_parameters(params: Parameters) -> "WorkflowBuilder": wb = WorkflowBuilder( name=params.string("workflow_name", default="Workflow"), created_by=params.string("workflow_created", default="Default Constructor"), workflow_directory=params.creatable_directory( "workflow_directory"), default_site=params.string("site"), conda_script_generator=CondaJobScriptGenerator.from_parameters( params), docker_script_generator=DockerJobScriptGenerator.from_parameters( params), namespace=params.string("namespace"), default_resource_request=ResourceRequest.from_parameters(params), data_configuration=params.string("data_configuration", default="sharedfs"), experiment_name=params.string("experiment_name", default=""), ) if params.boolean("include_nas", default=True): add_local_nas_to_sites( wb._sites_catalog, params # pylint: disable=protected-access ) if params.boolean("include_saga", default=True): add_saga_cluster_to_sites( wb._sites_catalog, params # pylint: disable=protected-access ) configure_saga_properities( wb._properties, params # pylint: disable=protected-access ) return wb
def create_logger(params: Parameters) -> "LearningProgressHtmlLogger": output_dir = params.creatable_directory("experiment_group_dir") experiment_name = params.string("experiment") include_links_to_images = params.optional_boolean("include_image_links") num_pretty_descriptions = params.positive_integer( "num_pretty_descriptions", default=3 ) sort_by_length = params.boolean( "sort_learner_descriptions_by_length", default=False ) logging_dir = output_dir / experiment_name logging_dir.mkdir(parents=True, exist_ok=True) output_html_path = str(logging_dir / "index.html") if include_links_to_images is None: include_links_to_images = False logging.info("Experiment will be logged to %s", output_html_path) with open(output_html_path, "w") as outfile: html_dumper = CurriculumToHtmlDumper() outfile.write(f"<head>\n\t<style>{CSS}\n\t</style>\n</head>") outfile.write(f"\n<body>\n\t<h1>{experiment_name}</h1>") # A JavaScript function to allow toggling perception information outfile.write( """ <script> function myFunction(id) { var x = document.getElementById(id); if (x.style.display === "none") { x.style.display = "block"; } else { x.style.display = "none"; } } </script> """ ) return LearningProgressHtmlLogger( outfile_dir=output_html_path, html_dumper=html_dumper, include_links_to_images=include_links_to_images, num_pretty_descriptions=num_pretty_descriptions, sort_by_length=sort_by_length, )
def log_experiment_entry_point(params: Parameters) -> None: experiment_name = params.string("experiment") debug_log_dir = params.optional_creatable_directory("debug_log_directory") graph_logger: Optional[HypothesisLogger] if debug_log_dir: logging.info("Debug graphs will be written to %s", debug_log_dir) graph_logger = HypothesisLogger(debug_log_dir, enable_graph_rendering=True) else: graph_logger = None logger = LearningProgressHtmlLogger.create_logger(params) language_mode = params.enum("language_mode", LanguageMode, default=LanguageMode.ENGLISH) (training_instance_groups, test_instance_groups) = curriculum_from_params(params, language_mode) execute_experiment( Experiment( name=experiment_name, training_stages=training_instance_groups, learner_factory=learner_factory_from_params( params, graph_logger, language_mode), pre_example_training_observers=[ logger.pre_observer(), CandidateAccuracyObserver("pre-acc-observer"), ], post_example_training_observers=[logger.post_observer()], test_instance_groups=test_instance_groups, test_observers=[logger.test_observer()], sequence_chooser=RandomChooser.for_seed(0), ), log_path=params.optional_creatable_directory("hypothesis_log_dir"), log_hypotheses_every_n_examples=params.integer( "log_hypothesis_every_n_steps", default=250), log_learner_state=params.boolean("log_learner_state", default=True), learner_logging_path=params.optional_creatable_directory( "experiment_group_dir"), starting_point=params.integer("starting_point", default=-1), point_to_log=params.integer("point_to_log", default=0), load_learner_state=params.optional_existing_file("learner_state_path"), )
def main(params: Parameters): graph_def_file = params.existing_file("graph_def_file") checkpoint_glob = params.string("checkpoint_glob") vocab_file = params.existing_file("vocab_file") sentences_file = params.existing_file("sentences_file") output_file = params.creatable_file("output_file") do_profiling = params.optional_boolean_with_default("profile", False) with tensorflow.contrib.tfprof.ProfileContext( os.getcwd(), trace_steps=range(2, 10), dump_steps=range(1, 10, 2), enabled=params.optional_boolean_with_default("profile", False) ): lm = LM1B.load(graph_def_file=graph_def_file, checkpoint_file=checkpoint_glob, vocab=vocab_file) start_time = None num_tokens_processed = 0 with open(sentences_file, 'r', newline='') as inp: csv_input = csv.reader(inp, delimiter='\t') with open(output_file, 'w', newline='') as out: csv_output = csv.writer(out, delimiter='\t') for line in csv_input: tokens = line[0].split(' ') output_row = list(line) output_row.insert(0, lm.log_probability_of_sentence(tokens)) csv_output.writerow(output_row) # we delay till after the first sentence to avoid counting startup time if num_tokens_processed == 0: start_time = time.time() num_tokens_processed += len(tokens) elapsed_time = time.time() - start_time print(f"Processed {num_tokens_processed - 1} sentences in {elapsed_time} " f"seconds, {num_tokens_processed / elapsed_time} tokens per second. First sentence not " f"included in time calculation.")
def main(cluster_params: Parameters, job_param_file: Path) -> None: runner = SlurmPythonRunner.from_parameters(cluster_params) job_params = YAMLParametersLoader().load(job_param_file) entry_point = job_params.string("entry_point") memory = MemoryAmount.parse(job_params.string("memory")) runner.run_entry_point( entry_point_name=entry_point, param_file=job_param_file, partition=cluster_params.string("partition"), working_directory=job_params.optional_creatable_directory( "working_directory") or Path(os.getcwd()), num_gpus=job_params.integer("num_gpus", default=0, valid_range=Range.at_least(0)), num_cpus=job_params.integer("num_cpus", default=1, valid_range=Range.at_least(1)), job_name=job_params.string("job_name", default=entry_point), memory_request=memory, echo_template=cluster_params.boolean("echo_template", default=False), slurm_script_path=job_params.optional_creatable_file( "slurm_script_path"), )
def curriculum_from_params(params: Parameters, language_mode: LanguageMode = LanguageMode.ENGLISH): str_to_train_test_curriculum: Mapping[str, Tuple[ CURRICULUM_BUILDER, Optional[CURRICULUM_BUILDER]]] = { "m6-deniz": (make_m6_curriculum, None), "each-object-by-itself": ( build_each_object_by_itself_curriculum_train, build_each_object_by_itself_curriculum_test, ), "pursuit": ( build_pursuit_curriculum, build_each_object_by_itself_curriculum_test, ), "m6-preposition": (build_m6_prepositions_curriculum, None), "m9-objects": (build_gaila_phase1_object_curriculum, None), "m9-attributes": (build_gaila_phase1_attribute_curriculum, None), "chinese-classifiers": (build_classifier_curriculum, None), "m9-relations": (build_gaila_phase1_relation_curriculum, None), "m9-events": (build_gaila_phase1_verb_curriculum, None), "m9-debug": (build_debug_curriculum_train, build_debug_curriculum_test), "m9-complete": (build_gaila_phase_1_curriculum, None), "m13-imprecise-size": (make_imprecise_size_curriculum, None), "m13-imprecise-temporal": (make_imprecise_temporal_descriptions, None), "m13-subtle-verb-distinction": (make_subtle_verb_distinctions_curriculum, None), "m13-object-restrictions": (build_functionally_defined_objects_curriculum, None), "m13-functionally-defined-objects": ( build_functionally_defined_objects_train_curriculum, build_functionally_defined_objects_curriculum, ), "m13-generics": (build_generics_curriculum, None), "m13-complete": (build_gaila_m13_curriculum, None), "m13-verbs-with-dynamic-prepositions": ( make_verb_with_dynamic_prepositions_curriculum, None, ), "m13-shuffled": (build_m13_shuffled_curriculum, build_gaila_m13_curriculum), "m13-relations": (make_prepositions_curriculum, None), "actions-and-generics-curriculum": (build_actions_and_generics_curriculum, None), "m15-object-noise-experiments": ( build_object_learner_experiment_curriculum_train, build_each_object_by_itself_curriculum_test, ), "m18-integrated-learners-experiment": ( integrated_pursuit_learner_experiment_curriculum, integrated_pursuit_learner_experiment_test, ), } curriculum_name = params.string("curriculum", str_to_train_test_curriculum.keys()) language_generator = ( integrated_experiment_language_generator(language_mode) if curriculum_name == "m18-integrated-learners-experiment" else phase2_language_generator(language_mode)) if params.has_namespace("pursuit-curriculum-params"): pursuit_curriculum_params = params.namespace( "pursuit-curriculum-params") else: pursuit_curriculum_params = Parameters.empty() use_path_instead_of_goal = params.boolean("use-path-instead-of-goal", default=False) (training_instance_groups, test_instance_groups) = str_to_train_test_curriculum[curriculum_name] num_samples = params.optional_positive_integer("num_samples") # We need to be able to accept 0 as the number of noise objects but optional_integer doesn't currently # support specifying a range of acceptable values: https://github.com/isi-vista/vistautils/issues/142 num_noise_objects = params.optional_integer("num_noise_objects") if curriculum_name == "pursuit": return ( training_instance_groups( num_samples, num_noise_objects, language_generator, pursuit_curriculum_params=pursuit_curriculum_params, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], ) # optional argument to use path instead of goal elif use_path_instead_of_goal and curriculum_name in [ "m13-complete", "m13-shuffled", "m13-verbs-with-dynamic-prepositions", ]: return ( training_instance_groups( num_samples, num_noise_objects, language_generator, use_path_instead_of_goal, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], ) elif curriculum_name in ( "m15-object-noise-experiments", "m18-integrated-learners-experiment", ): return ( training_instance_groups( num_samples, num_noise_objects, language_generator, params=params.namespace_or_empty("train_curriculum"), ), test_instance_groups( 5, 0, language_generator, params=params.namespace_or_empty("test_curriculum"), ) if test_instance_groups else [], ) return ( training_instance_groups(num_samples, num_noise_objects, language_generator), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], )
def main( params: Parameters, scenes_iterable_input: Optional[Iterable[Phase1InstanceGroup]] = None, output_directory: Optional[Path] = None, visualizer: Optional[SituationVisualizer] = None, ) -> None: language_mode = params.enum("language_mode", LanguageMode, default=LanguageMode.ENGLISH) if scenes_iterable_input is None: scenes_iterable: Iterable[Phase1InstanceGroup] = [ make_curriculum(None, None, phase2_language_generator(language_mode)) ] else: scenes_iterable = scenes_iterable_input num_iterations = params.positive_integer("iterations") steps_before_vis = params.positive_integer("steps_before_vis") specific_scene = params.optional_positive_integer("scene") automatically_save_renderings = params.boolean( "automatically_save_renderings", default=False) if "experiment_group_dir" in params: rendering_filename_generator = from_experiment_filename_generator else: rendering_filename_generator = default_filename_generator screenshot_dir = output_directory random.seed(params.integer("seed")) np.random.seed(params.integer("seed")) if params.string("debug_bounding_boxes", default="off") == "on": debug_bounding_boxes = True else: debug_bounding_boxes = False if params.string("gaze_arrows", default="off") == "on": gaze_arrows = True else: gaze_arrows = False # go through curriculum scenes and output geometry types if visualizer is None: viz = SituationVisualizer() else: viz = visualizer viz.clear_scene() model_scales = viz.get_model_scales() for object_type, multiplier in OBJECT_SCALE_MULTIPLIER_MAP.items(): if object_type in model_scales: v3 = model_scales[object_type] new_v3 = (v3[0] * multiplier, v3[1] * multiplier, v3[2] * multiplier) model_scales[object_type] = new_v3 else: model_scales[object_type] = (multiplier, multiplier, multiplier) for model_name, scale in model_scales.items(): logging.info("SCALE: %s -> %s", model_name, scale.__str__()) # used to start a frame from where the previous one left off previous_model_positions: Optional[PositionsMap] = None for scene_number, scene_elements in enumerate( SceneCreator.create_scenes(scenes_iterable)): # If a scene number is provided in the params file, only render that scene if specific_scene and scene_number < specific_scene: continue if specific_scene and scene_number > specific_scene: break scene_filename = rendering_filename_generator(scene_number, scene_elements) if scene_filename in _FILENAMES_USED: continue _FILENAMES_USED.add(scene_filename) print(f"SCENE {scene_number}") viz.set_title(" ".join(token for token in scene_elements.tokens) + " (" + str(scene_elements.current_frame + 1) + "/" + str(scene_elements.total_frames) + ")") # if this is a new scene, forget the positions from the last scene if scene_elements.current_frame == 0: previous_model_positions = None if automatically_save_renderings: # if in auto mode and scene contains an excluded vocab word, skip it skip_scene = False for token in scene_elements.tokens: if token in EXCLUDED_VOCAB: skip_scene = True if skip_scene: continue # for debugging purposes: # SceneCreator.graph_for_each(scene_elements.object_graph, print_obj_names) # bind visualizer and properties to top level rendering function: bound_render_obj = partial(render_obj, viz, scene_elements.property_map, previous_model_positions) # bind visualizer and properties to nested obj rendering function bound_render_nested_obj = partial(render_obj_nested, viz, scene_elements.property_map, previous_model_positions) # render each object in graph SceneCreator.graph_for_each_top_level(scene_elements.object_graph, bound_render_obj, bound_render_nested_obj) # apply scale to top level nodes in scene for node in scene_elements.object_graph: if (node.name not in OBJECT_NAMES_TO_EXCLUDE and node.name.split("_")[0] in OBJECT_SCALE_MULTIPLIER_MAP): viz.multiply_scale( node.name, OBJECT_SCALE_MULTIPLIER_MAP[node.name.split("_")[0]]) # find the Region relations that refer to separate objects: # (e.g. the cookie is in the region of the hand (of the person), not the leg-segment in in the region of the torso). inter_object_in_region_map: DefaultDict[ ObjectPerception, List[Region[ObjectPerception]]] = defaultdict(list) for top_level_node in scene_elements.object_graph: if top_level_node.perceived_obj in scene_elements.in_region_map: inter_object_in_region_map[ top_level_node. perceived_obj] = scene_elements.in_region_map[ top_level_node.perceived_obj] # print(inter_object_in_region_map) # we want to assemble a lookup of the offsets (position) of each object's subobjects. sub_object_offsets = {} for node_name, node in viz.geo_nodes.items(): child_node_to_offset = {} recurse_list: List[NodePath] = node.children while recurse_list: next_batch: List[NodePath] = [] for child in recurse_list: next_batch += child.children # make sure this is a sub-object if child.hasMat() and child.parent.name != node_name: # child has non-identity transformation matrix applied to it (transform differs from parent) # TODO: we could re-export all of the models in such a way to eliminate this extra layer # in the scene graph child_node_to_offset[ child.parent.name] = child.get_pos() recurse_list = next_batch sub_object_offsets[node_name] = child_node_to_offset # handle skipping scene if not automatically_save_renderings: viz.run_for_seconds(1) skip_command = input("type 's' and hit ENTER to skip this scene") if skip_command == "s": viz.clear_scene() viz.run_for_seconds(0.25) continue handle_to_in_region_map = { object_perception.debug_handle: region_list for object_perception, region_list in inter_object_in_region_map.items() } frozen_objects = objects_to_freeze( handle_to_in_region_map, scene_elements.situation, scene_elements.situation_object_to_handle, ) if scene_elements.interpolated_scene_moving_items: # freeze everything not included in the interpolated scene frozen_objects = (immutableset([ key.debug_handle for key in scene_elements.in_region_map.keys() ]) - scene_elements.interpolated_scene_moving_items) # now that every object has been instantiated into the scene, # they need to be re-positioned. repositioned_map = None for repositioned_map in _solve_top_level_positions( top_level_objects=immutableset([ node.perceived_obj for node in scene_elements.object_graph if node.name not in OBJECT_NAMES_TO_EXCLUDE ]), sub_object_offsets=sub_object_offsets, in_region_map=inter_object_in_region_map, model_scales=model_scales, frozen_objects=frozen_objects, iterations=num_iterations, yield_steps=steps_before_vis, previous_positions=previous_model_positions, ): viz.clear_debug_nodes() viz.clear_gaze_arrows() if not automatically_save_renderings: viz.run_for_seconds(0.25) viz.set_positions(repositioned_map) if debug_bounding_boxes: for name in repositioned_map.name_to_position: viz.add_debug_bounding_box( name, repositioned_map.name_to_position[name], repositioned_map.name_to_scale[name], ) if gaze_arrows: for handle, props in scene_elements.property_map.items(): for prop in props: if isinstance( prop, OntologyNode) and prop.handle == "gazed-at": viz.add_gaze_arrow( handle, repositioned_map.name_to_position[handle], repositioned_map.name_to_scale[handle], ) # the visualizer seems to need about a second to render an update if not automatically_save_renderings: viz.run_for_seconds(1) # viz.print_scene_graph() previous_model_positions = None # only store previous positions when continuing to next frame / scene previous_model_positions = repositioned_map viz.run_for_seconds(1) screenshot( automatically_save_renderings=automatically_save_renderings, filename=scene_filename, screenshot_dir=screenshot_dir, viz=viz, ) viz.clear_scene() viz.run_for_seconds(0.25)
def sample_main(params: Parameters): assert params.string("only_original") == "foo" assert params.string("only_cli") == "bar" assert params.string("overridden") == "hello" assert params.namespace("nested").string( "overridden") == "I've been overridden"
def learner_factory_from_params( params: Parameters, graph_logger: Optional[HypothesisLogger], language_mode: LanguageMode = LanguageMode.ENGLISH, ) -> Callable[[], TopLevelLanguageLearner]: # type: ignore learner_type = params.string( "learner", [ "pursuit", "object-subset", "preposition-subset", "attribute-subset", "verb-subset", "integrated-learner", "integrated-learner-recognizer", "pursuit-gaze", ], ) beam_size = params.positive_integer("beam_size", default=10) if language_mode == LanguageMode.CHINESE and learner_type not in [ "integrated-learner", "integrated-learner-recognizer", ]: raise RuntimeError( "Only able to test Chinese with integrated learner.") rng = random.Random() rng.seed(0) perception_generator = GAILA_PHASE_1_PERCEPTION_GENERATOR objects = [YOU_HACK, ME_HACK] objects.extend(PHASE_1_CURRICULUM_OBJECTS) # Eval hack! This is specific to the Phase 1 ontology object_recognizer = ObjectRecognizer.for_ontology_types( objects, determiners=ENGLISH_DETERMINERS, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, perception_generator=perception_generator, ) if learner_type == "pursuit": return lambda: ObjectPursuitLearner.from_parameters( params.namespace("pursuit"), graph_logger=graph_logger) elif learner_type == "pursuit-gaze": return lambda: IntegratedTemplateLearner( object_learner=PursuitObjectLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, ontology=GAILA_PHASE_2_ONTOLOGY, language_mode=language_mode, rank_gaze_higher=True, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), ) elif learner_type == "object-subset": return lambda: SubsetObjectLearner(ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH) elif learner_type == "attribute-subset": return lambda: SubsetAttributeLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "preposition-subset": return lambda: SubsetPrepositionLearner( # graph_logger=graph_logger, object_recognizer=object_recognizer, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "verb-subset": return lambda: SubsetVerbLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "integrated-learner": return lambda: IntegratedTemplateLearner( object_learner=SubsetObjectLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) elif learner_type == "integrated-learner-recognizer": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) else: raise RuntimeError("can't happen")
def curriculum_from_params(params: Parameters, language_mode: LanguageMode = LanguageMode.ENGLISH): str_to_train_test_curriculum: Mapping[str, Tuple[ CURRICULUM_BUILDER, Optional[CURRICULUM_BUILDER]]] = { "m6-deniz": (make_m6_curriculum, None), "each-object-by-itself": ( build_each_object_by_itself_curriculum_train, build_each_object_by_itself_curriculum_test, ), "pursuit": ( build_pursuit_curriculum, build_each_object_by_itself_curriculum_test, ), "m6-preposition": (build_m6_prepositions_curriculum, None), "m9-objects": (build_gaila_phase1_object_curriculum, None), "m9-attributes": (build_gaila_phase1_attribute_curriculum, None), "m9-relations": (build_gaila_phase1_relation_curriculum, None), "m9-events": (build_gaila_phase1_verb_curriculum, None), "m9-debug": (build_debug_curriculum_train, build_debug_curriculum_test), "m9-complete": (build_gaila_phase_1_curriculum, None), "m13-imprecise-size": (make_imprecise_size_curriculum, None), "m13-imprecise-temporal": (make_imprecise_temporal_descriptions, None), "m13-subtle-verb-distinction": (make_subtle_verb_distinctions_curriculum, None), "m13-object-restrictions": (build_functionally_defined_objects_curriculum, None), "m13-functionally-defined-objects": ( build_functionally_defined_objects_train_curriculum, build_functionally_defined_objects_curriculum, ), "m13-generics": (build_generics_curriculum, None), "m13-complete": (build_gaila_m13_curriculum, None), "m13-verbs-with-dynamic-prepositions": ( make_verb_with_dynamic_prepositions_curriculum, None, ), "m13-shuffled": (build_m13_shuffled_curriculum, build_gaila_m13_curriculum), "m13-relations": (make_prepositions_curriculum, None), } curriculum_name = params.string("curriculum", str_to_train_test_curriculum.keys()) language_generator = phase2_language_generator(language_mode) if params.has_namespace("pursuit-curriculum-params"): pursuit_curriculum_params = params.namespace( "pursuit-curriculum-params") else: pursuit_curriculum_params = Parameters.empty() (training_instance_groups, test_instance_groups) = str_to_train_test_curriculum[curriculum_name] num_samples = params.optional_positive_integer("num_samples") num_noise_objects = params.optional_positive_integer("num_noise_objects") return ( training_instance_groups(num_samples, num_noise_objects, language_generator) if curriculum_name != "pursuit" else training_instance_groups( num_samples, num_noise_objects, language_generator, pursuit_curriculum_params=pursuit_curriculum_params, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], )
def example_workflow(params: Parameters): # pragma: no cover """ An example script to generate a container workflow for submission to Pegasus. """ tmp_path = params.creatable_directory("example_root_dir") docker_tar = params.creatable_file("docker_tar") docker_build_dir = params.existing_directory("docker_build_dir") docker_image_name = params.string( "docker_image_name", default="pegasus_wrapper_container_demo" ) docker_image_tag = params.string("docker_image_tag", default="0.2") mongo_db_tar = params.string( "mongo_db_tar", default="/nas/gaia/shared/cluster/docker/mongo-4.4.tar" ) monogo_db_data = "/scratch/dockermount/pegasus_wrapper_tmp/data" mongo_db_config = "/scratch/dockermount/pegasus_wrapper_tmp/config" # Generating parameters for initializing a workflow # We recommend making workflow directory, site, and partition parameters # in an research workflow workflow_params = Parameters.from_mapping( { "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "home_dir": str(tmp_path), "partition": "scavenge", } ) saga31_request = SlurmResourceRequest.from_parameters( Parameters.from_mapping({"run_on_single_node": "saga31", "partition": "gaia"}) ) workflow_params = workflow_params.unify(params) # Our source input for the sample jobs input_file = tmp_path / "raw_nums.txt" add_y_output_file_nas = tmp_path / "nums_y.txt" sorted_output_file_nas = tmp_path / "sorted.txt" random = Random() random.seed(0) nums = [int(random.random() * 100) for _ in range(0, 25)] # Base Job Locator job_locator = Locator(("jobs",)) docker_python_root = Path("/home/app/") job_profile = PegasusProfile( namespace="pegasus", key="transfer.bypass.input.staging", value="True" ) # Write a list of numbers out to be able to run the workflow with input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) initialize_vista_pegasus_wrapper(workflow_params) build_container = run_bash( job_locator / "build_docker", command=[ "mkdir -p /scratch/dockermount/pegasus_wrapper_tmp", f"cd {docker_build_dir}", f"docker build . -t {docker_image_name}:{docker_image_tag}", f"docker save -o /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_image_name}:{docker_image_tag}", f"cp /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_tar.absolute()}", f"chmod go+r {docker_tar.absolute()}", f"docker load --input {mongo_db_tar}", f"mkdir -p {monogo_db_data}", f"mkdir -p {mongo_db_config}", ], depends_on=[], resource_request=saga31_request, ) python36 = add_container( f"{docker_image_name}:{docker_image_tag}", "docker", str(docker_tar.absolute()), image_site="saga", bypass_staging=True, ) mongo4_4 = add_container( "mongo:4.4", "docker", mongo_db_tar, image_site="saga", bypass_staging=True ) start_mongo = start_docker_as_service( mongo4_4, depends_on=[build_container], mounts=[f"{monogo_db_data}:/data/db", f"{mongo_db_config}/etc/custom"], docker_args=f"-p 27017:27017", resource_request=saga31_request, ) add_y_job = run_python_on_args( job_locator / "add", docker_python_root / "add_y.py", set_args=f"{input_file} {add_y_output_file_nas} --y 10", depends_on=[build_container], job_profiles=[job_profile], resource_request=saga31_request, container=python36, input_file_paths=[input_file], output_file_paths=[add_y_output_file_nas], ) sort_job = run_python_on_parameters( job_locator / "sort", sort_nums_in_file, {"input_file": add_y_output_file_nas, "output_file": sorted_output_file_nas}, depends_on=[add_y_job], container=python36, job_profiles=[job_profile], resource_request=saga31_request, input_file_paths=add_y_output_file_nas, output_file_paths=sorted_output_file_nas, ) _ = stop_docker_as_service( mongo4_4, depends_on=[start_mongo, sort_job], resource_request=saga31_request ) # Generate the Pegasus DAX file & a Submit Script write_workflow_description(tmp_path)
def integrated_experiment_entry_point(params: Parameters) -> None: initialize_vista_pegasus_wrapper(params) baseline_parameters = params.namespace("integrated_learners_experiment") pursuit_resource_request_params = params.namespace( "pursuit_resource_request") # This code is commented out but may be used in the near future to add language ablation # Capabilities to this curriculum. # get the minimum and maximum accuracy of the language with the situation # min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1) # max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5) # num_language_accuracy_increment = params.integer( # "num_language_accuracy_increment", default=5 # ) # values_for_accuracy = np.linspace( # min_language_accuracy, max_language_accuracy, num_language_accuracy_increment # ) # Get if attributes or relations should be included include_attributes = params.boolean("include_attributes", default=True) include_relations = params.boolean("include_relations", default=True) limit_jobs_for_category( "pursuit_job_limit", params.integer("num_pursuit_learners_active", default=8)) curriculum_repository_path = params.creatable_directory( "curriculum_repository_path") # Job to build desired curriculum(s) which our learners use curriculum_dependencies = immutableset(( CURRICULUM_NAME_FORMAT.format( noise=add_noise, shuffled=shuffle, relations=include_relations, attributes=include_attributes, ), run_python_on_parameters( Locator( CURRICULUM_NAME_FORMAT.format( noise=add_noise, shuffled=shuffle, relations=include_relations, attributes=include_attributes, ).split("-")), generate_curriculum_script, baseline_parameters.unify({ "train_curriculum": Parameters.from_mapping(CURRICULUM_PARAMS).unify( { "add_noise": add_noise, "shuffled": shuffle, "include_attributes": include_attributes, "include_relations": include_relations, }).as_mapping() }).unify(FIXED_PARAMETERS).unify( {"curriculum_repository_path": curriculum_repository_path}), depends_on=[], ), Parameters.from_mapping(CURRICULUM_PARAMS).unify( { "add_noise": add_noise, "shuffled": shuffle, "include_attributes": include_attributes, "include_relations": include_relations, }), ) for add_noise in (True, False) for shuffle in (True, False)) # jobs to build experiment for (curriculum_str, curriculum_dep, curr_params) in curriculum_dependencies: object_learner_type = params.string( "object_learner.learner_type", valid_options=["pursuit", "subset", "pbv"], default="pursuit", ) attribute_learner_type = params.string( "attribute_learner.learner__type", valid_options=["none", "pursuit", "subset"], default="pursuit", ) relation_learner_type = params.string( "relation_learner.learner_type", valid_options=["none", "pursuit", "subset"], default="pursuit", ) experiment_name_string = EXPERIMENT_NAME_FORMAT.format( curriculum_name=curriculum_str.replace("-", "+"), object_learner=object_learner_type, attribute_learner=attribute_learner_type, relation_learner=relation_learner_type, ) experiment_name = Locator(experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify(FIXED_PARAMETERS).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "load_from_curriculum_repository": curriculum_repository_path, "train_curriculum": curr_params, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[curriculum_dep], resource_request=SlurmResourceRequest.from_parameters( pursuit_resource_request_params) if "pursuit" in [ object_learner_type, attribute_learner_type, relation_learner_type ] else None, category="pursuit" if "pursuit" in [ object_learner_type, attribute_learner_type, relation_learner_type ] else "subset", use_pypy=True, ) write_workflow_description()
def log_experiment_entry_point(params: Parameters) -> None: experiment_name = params.string("experiment") debug_log_dir = params.optional_creatable_directory("debug_log_directory") graph_logger: Optional[HypothesisLogger] if debug_log_dir: logging.info("Debug graphs will be written to %s", debug_log_dir) graph_logger = HypothesisLogger(debug_log_dir, enable_graph_rendering=True) else: graph_logger = None logger = LearningProgressHtmlLogger.create_logger(params) language_mode = params.enum("language_mode", LanguageMode, default=LanguageMode.ENGLISH) curriculum_repository_path = params.optional_existing_directory( "load_from_curriculum_repository") if curriculum_repository_path: curriculum = read_experiment_curriculum(curriculum_repository_path, params, language_mode) (training_instance_groups, test_instance_groups) = ( curriculum.train_curriculum, curriculum.test_curriculum, ) else: (training_instance_groups, test_instance_groups) = curriculum_from_params(params, language_mode) experiment_group_dir = params.optional_creatable_directory( "experiment_group_dir") resume_from_last_logged_state = params.boolean( "resume_from_latest_logged_state", default=False) # Check if we have explicit observer states to load observers_state = params.optional_existing_file("observers_state_path") test_observer = [] # type: ignore pre_observer = [] # type: ignore post_observer = [] # type: ignore if resume_from_last_logged_state and observers_state: raise RuntimeError( f"Can not resume from last logged state and provide explicit observer state paths" ) if resume_from_last_logged_state: if not experiment_group_dir: raise RuntimeError( "experiment_group_dir must be specified when resume_from_last_logged_state is true." ) # Try to Load Observers for _, observers_state_path in observer_states_by_most_recent( cast(Path, experiment_group_dir) / "observer_state", "observers_state_at_"): try: with observers_state_path.open("rb") as f: observers_holder = pickle.load(f) pre_observer = observers_holder.pre_observers post_observer = observers_holder.post_observers test_observer = observers_holder.test_observers except OSError: logging.warning( "Unable to open observer state at %s; skipping.", str(observers_state_path), ) except pickle.UnpicklingError: logging.warning( "Couldn't unpickle observer state at %s; skipping.", str(observers_state_path), ) if not pre_observer and not post_observer and not test_observer: logging.warning("Reverting to default observers.") pre_observer = [ logger.pre_observer( # type: ignore params=params.namespace_or_empty("pre_observer"), experiment_group_dir=experiment_group_dir, ) ] post_observer = [ logger.post_observer( # type: ignore params=params.namespace_or_empty("post_observer"), experiment_group_dir=experiment_group_dir, ) ] test_observer = [ logger.test_observer( # type: ignore params=params.namespace_or_empty("test_observer"), experiment_group_dir=experiment_group_dir, ) ] elif observers_state: try: with observers_state.open("rb") as f: observers_holder = pickle.load(f) pre_observer = observers_holder.pre_observers post_observer = observers_holder.post_observers test_observer = observers_holder.test_observers except OSError: logging.warning("Unable to open observer state at %s; skipping.", str(observers_state)) except pickle.UnpicklingError: logging.warning( "Couldn't unpickle observer state at %s; skipping.", str(observers_state)) else: pre_observer = [ logger.pre_observer( # type: ignore params=params.namespace_or_empty("pre_observer"), experiment_group_dir=experiment_group_dir, ) ] post_observer = [ logger.post_observer( # type: ignore params=params.namespace_or_empty("post_observer"), experiment_group_dir=experiment_group_dir, ) ] test_observer = [ logger.test_observer( # type: ignore params=params.namespace_or_empty("test_observer"), experiment_group_dir=experiment_group_dir, ) ] execute_experiment( Experiment( name=experiment_name, training_stages=training_instance_groups, learner_factory=learner_factory_from_params( params, graph_logger, language_mode), pre_example_training_observers=pre_observer, post_example_training_observers=post_observer, test_instance_groups=test_instance_groups, test_observers=test_observer, sequence_chooser=RandomChooser.for_seed(0), ), log_path=params.optional_creatable_directory("hypothesis_log_dir"), log_hypotheses_every_n_examples=params.integer( "log_hypothesis_every_n_steps", default=250), log_learner_state=params.boolean("log_learner_state", default=True), learner_logging_path=experiment_group_dir, starting_point=params.integer("starting_point", default=0), point_to_log=params.integer("point_to_log", default=0), load_learner_state=params.optional_existing_file("learner_state_path"), resume_from_latest_logged_state=resume_from_last_logged_state, debug_learner_pickling=params.boolean("debug_learner_pickling", default=False), )
def build_object_learner_factory( params: Parameters, beam_size: int, language_mode: LanguageMode) -> TemplateLearner: learner_type = params.string( "learner_type", valid_options=[ "subset", "pbv", "cross-situational", "pursuit", "recognizer" ], default="subset", ) ontology, objects, perception_gen = ONTOLOGY_STR_TO_ONTOLOGY[params.string( "ontology", valid_options=ONTOLOGY_STR_TO_ONTOLOGY.keys(), default="phase2")] if learner_type == "subset": return SubsetObjectLearnerNew(ontology=ontology, beam_size=beam_size, language_mode=language_mode) elif learner_type == "pbv": chooser = RandomChooser.for_seed( params.optional_integer("random_seed", default=0)) return ProposeButVerifyObjectLearner( graph_match_confirmation_threshold=params.floating_point( "graph_match_confirmation_threshold", default=0.8), rng=chooser, ontology=ontology, language_mode=language_mode, ) elif learner_type == "cross-situational": return CrossSituationalObjectLearner( graph_match_confirmation_threshold=params.floating_point( "graph_match_confirmation_threshold"), lexicon_entry_threshold=params.floating_point( "lexicon_entry_threshold"), smoothing_parameter=params.floating_point("smoothing_parameter"), expected_number_of_meanings=len( ontology.nodes_with_properties(THING)), ontology=ontology, language_mode=language_mode, ) elif learner_type == "pursuit": rng = random.Random() rng.seed(params.integer("random_seed", default=0)) return PursuitObjectLearnerNew( learning_factor=params.floating_point("learning_factor"), graph_match_confirmation_threshold=params.floating_point( "graph_match_confirmation_threshold"), lexicon_entry_threshold=params.floating_point( "lexicon_entry_threshold"), rng=rng, smoothing_parameter=params.floating_point("smoothing_parameter"), ontology=ontology, language_mode=language_mode, ) elif learner_type == "recognizer": object_recognizer = ObjectRecognizer.for_ontology_types( objects, determiners=ENGLISH_DETERMINERS, ontology=ontology, language_mode=language_mode, perception_generator=perception_gen, ) return ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode) else: raise RuntimeError("Object learner type invalid")
def learner_factory_from_params( params: Parameters, graph_logger: Optional[HypothesisLogger], language_mode: LanguageMode = LanguageMode.ENGLISH, ) -> Callable[[], TopLevelLanguageLearner]: # type: ignore learner_type = params.string( "learner", [ "pursuit", "object-subset", "preposition-subset", "attribute-subset", "verb-subset", "integrated-learner", "integrated-learner-recognizer-without-generics", "integrated-learner-recognizer", "pursuit-gaze", "integrated-object-only", "integrated-learner-params", "integrated-pursuit-attribute-only", ], ) beam_size = params.positive_integer("beam_size", default=10) rng = random.Random() rng.seed(0) perception_generator = GAILA_PHASE_1_PERCEPTION_GENERATOR objects = [YOU_HACK, ME_HACK] objects.extend(PHASE_1_CURRICULUM_OBJECTS) # Eval hack! This is specific to the Phase 1 ontology object_recognizer = ObjectRecognizer.for_ontology_types( objects, determiners=ENGLISH_DETERMINERS, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, perception_generator=perception_generator, ) if learner_type == "pursuit": return lambda: ObjectPursuitLearner.from_parameters( params.namespace("pursuit"), graph_logger=graph_logger) elif learner_type == "pursuit-gaze": return lambda: IntegratedTemplateLearner( object_learner=PursuitObjectLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, ontology=GAILA_PHASE_2_ONTOLOGY, language_mode=language_mode, rank_gaze_higher=True, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), ) elif learner_type == "object-subset": return lambda: SubsetObjectLearner(ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH) elif learner_type == "attribute-subset": return lambda: SubsetAttributeLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "preposition-subset": return lambda: SubsetPrepositionLearner( # graph_logger=graph_logger, object_recognizer=object_recognizer, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "verb-subset": return lambda: SubsetVerbLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "integrated-learner": return lambda: IntegratedTemplateLearner( object_learner=SubsetObjectLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) elif learner_type == "integrated-learner-recognizer": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), generics_learner=SimpleGenericsLearner(), ) elif learner_type == "ic": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) elif learner_type == "integrated-object-only": object_learner_type = params.string( "object_learner_type", valid_options=["subset", "pbv", "pursuit"], default="subset", ) if params.has_namespace("learner_params"): learner_params = params.namespace("learner_params") else: learner_params = params.empty(namespace_prefix="learner_params") object_learner_factory: Callable[[], TemplateLearner] if object_learner_type == "subset": def subset_factory() -> SubsetObjectLearnerNew: return SubsetObjectLearnerNew( # type: ignore ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ) object_learner_factory = subset_factory elif object_learner_type == "pbv": def pbv_factory() -> ProposeButVerifyObjectLearner: return ProposeButVerifyObjectLearner.from_params( # type: ignore learner_params) object_learner_factory = pbv_factory elif object_learner_type == "pursuit": def pursuit_factory() -> PursuitObjectLearnerNew: return PursuitObjectLearnerNew( # type: ignore learning_factor=learner_params.floating_point( "learning_factor"), graph_match_confirmation_threshold=learner_params. floating_point("graph_match_confirmation_threshold"), lexicon_entry_threshold=learner_params.floating_point( "lexicon_entry_threshold"), rng=rng, smoothing_parameter=learner_params.floating_point( "smoothing_parameter"), ontology=GAILA_PHASE_2_ONTOLOGY, language_mode=language_mode, ) object_learner_factory = pursuit_factory else: raise RuntimeError( f"Invalid Object Learner Type Selected: {learner_type}") return lambda: IntegratedTemplateLearner(object_learner= object_learner_factory()) elif learner_type == "integrated-learner-params": object_learner = build_object_learner_factory( # type:ignore params.namespace_or_empty("object_learner"), beam_size, language_mode) attribute_learner = build_attribute_learner_factory( # type:ignore params.namespace_or_empty("attribute_learner"), beam_size, language_mode) relation_learner = build_relation_learner_factory( # type:ignore params.namespace_or_empty("relation_learner"), beam_size, language_mode) action_learner = build_action_learner_factory( # type:ignore params.namespace_or_empty("action_learner"), beam_size, language_mode) plural_learner = build_plural_learner_factory( # type:ignore params.namespace_or_empty("plural_learner"), beam_size, language_mode) return lambda: IntegratedTemplateLearner( object_learner=object_learner, attribute_learner=attribute_learner, relation_learner=relation_learner, action_learner=action_learner, functional_learner=FunctionalLearner(language_mode=language_mode) if params.boolean("include_functional_learner", default=True) else None, generics_learner=SimpleGenericsLearner() if params.boolean( "include_generics_learner", default=True) else None, plural_learner=plural_learner, suppress_error=params.boolean("suppress_error", default=True), ) elif learner_type == "integrated-pursuit-attribute-only": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=PursuitAttributeLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, rank_gaze_higher=False, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, ), ) else: raise RuntimeError("can't happen")