def _explicit_split(source: KeyValueSource[str, bytes], params: Parameters): explicit_split_namespace = params.namespace(_EXPLICIT_SPLIT_PARAM) # We track these so we can ensure the split is a complete partition of the input, # if the user so desires. keys_copied = [] for split_namespace in explicit_split_namespace.sub_namespaces(): keys_for_split = file_lines_to_set( split_namespace.existing_file("keys_file")) with KeyValueSink.zip_bytes_sink( split_namespace.creatable_file("output_file")) as split_sink: for key in keys_for_split: source_value = source.get(key) if source_value is not None: split_sink.put(key, source_value) keys_copied.append(key) else: error_message = ( f"For split specified in {split_namespace.namespace_prefix}, " f"requested key value {key} not found in {source}.") available_keys = source.keys() if available_keys is not None: error_message = ( f"{error_message} Here are a few" # type: ignore f"available keys: {str_list_limited(source.keys(), 10)}" ) raise RuntimeError(error_message) if params.boolean("must_be_exhaustive", default=True): keys_not_copied = immutableset(source.keys()) - set(keys_copied) if keys_not_copied: raise RuntimeError( f"Expected the split to be a partition, but " f"{len(keys_not_copied)} were not included in any output split, " f"including {str_list_limited(keys_not_copied, 10)}. " f"If you did not intend the split to be exhaustive, " f"please specify set parameter must_be_exhaustive to False")
def gaze_ablation_runner_entry_point(params: Parameters) -> None: """This function creates all possible gaze ablation param files within a given range""" initialize_vista_pegasus_wrapper(params) # Get the baseline experiment parameters for gaze ablation -- these are things common to all of # the experiments, like: # # include_image_links: true # sort_learner_descriptions_by_length: True # num_pretty_descriptions: 5 baseline_parameters = params.namespace("gaze_ablation") # get the minimum and maximum number of objects in a scene min_num_objects = params.integer("min_num_objects", default=1) max_num_objects = params.integer("max_num_objects", default=7) # this gets the number of different accuracies to try; default = increment by 0.1 num_accuracy_increments = params.integer("num_increments", default=11) values_for_accuracy = np.linspace(0, 1, num_accuracy_increments) # the number of noise instances to be included min_num_noise_instances = params.integer("min_num_noise", default=0) max_num_noise_instances = params.integer("max_num_noise", default=0) # get the number of instances in the entire curriculum min_num_instances_in_curriculum = params.integer("min_instances", default=10) max_num_instances_in_curriculum = params.integer("max_instances", default=20) # all possible numbers of noise instances for num_noise_instances in range(min_num_noise_instances, max_num_noise_instances + 1): # all possible numbers of instances in the curriculum for num_instances in range(min_num_instances_in_curriculum, max_num_instances_in_curriculum + 1): # all possible numbers of instances for num_objects_in_instance in range(min_num_objects, max_num_objects + 1): # all possible accuracies for prob_given in values_for_accuracy: for prob_not_given in values_for_accuracy: # both ignoring and perceiving gaze for add_gaze in [True, False]: # Define the experiment name, which is used both as a job name and to # choose a directory in which to store the experiment results. experiment_name_string = EXPERIMENT_NAME_FORMAT.format( num_instances=num_instances, num_noise_instances=num_noise_instances, num_objects_in_instance=num_objects_in_instance, prob_given=prob_given, prob_not_given=prob_not_given, add_gaze=add_gaze, ) experiment_name = Locator( experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify( FIXED_PARAMETERS).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "pursuit-curriculum-params": { "num_instances": num_instances, "num_noise_instances": num_noise_instances, "num_objects_in_instance": num_objects_in_instance, "add_gaze": add_gaze, "prob_given": float(prob_given), "prob_not_given": float(prob_not_given), }, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[], ) write_workflow_description()
def sample_main(params: Parameters): assert params.string("only_original") == "foo" assert params.string("only_cli") == "bar" assert params.string("overridden") == "hello" assert params.namespace("nested").string( "overridden") == "I've been overridden"
def curriculum_from_params(params: Parameters, language_mode: LanguageMode = LanguageMode.ENGLISH): str_to_train_test_curriculum: Mapping[str, Tuple[ CURRICULUM_BUILDER, Optional[CURRICULUM_BUILDER]]] = { "m6-deniz": (make_m6_curriculum, None), "each-object-by-itself": ( build_each_object_by_itself_curriculum_train, build_each_object_by_itself_curriculum_test, ), "pursuit": ( build_pursuit_curriculum, build_each_object_by_itself_curriculum_test, ), "m6-preposition": (build_m6_prepositions_curriculum, None), "m9-objects": (build_gaila_phase1_object_curriculum, None), "m9-attributes": (build_gaila_phase1_attribute_curriculum, None), "chinese-classifiers": (build_classifier_curriculum, None), "m9-relations": (build_gaila_phase1_relation_curriculum, None), "m9-events": (build_gaila_phase1_verb_curriculum, None), "m9-debug": (build_debug_curriculum_train, build_debug_curriculum_test), "m9-complete": (build_gaila_phase_1_curriculum, None), "m13-imprecise-size": (make_imprecise_size_curriculum, None), "m13-imprecise-temporal": (make_imprecise_temporal_descriptions, None), "m13-subtle-verb-distinction": (make_subtle_verb_distinctions_curriculum, None), "m13-object-restrictions": (build_functionally_defined_objects_curriculum, None), "m13-functionally-defined-objects": ( build_functionally_defined_objects_train_curriculum, build_functionally_defined_objects_curriculum, ), "m13-generics": (build_generics_curriculum, None), "m13-complete": (build_gaila_m13_curriculum, None), "m13-verbs-with-dynamic-prepositions": ( make_verb_with_dynamic_prepositions_curriculum, None, ), "m13-shuffled": (build_m13_shuffled_curriculum, build_gaila_m13_curriculum), "m13-relations": (make_prepositions_curriculum, None), "actions-and-generics-curriculum": (build_actions_and_generics_curriculum, None), "m15-object-noise-experiments": ( build_object_learner_experiment_curriculum_train, build_each_object_by_itself_curriculum_test, ), "m18-integrated-learners-experiment": ( integrated_pursuit_learner_experiment_curriculum, integrated_pursuit_learner_experiment_test, ), } curriculum_name = params.string("curriculum", str_to_train_test_curriculum.keys()) language_generator = ( integrated_experiment_language_generator(language_mode) if curriculum_name == "m18-integrated-learners-experiment" else phase2_language_generator(language_mode)) if params.has_namespace("pursuit-curriculum-params"): pursuit_curriculum_params = params.namespace( "pursuit-curriculum-params") else: pursuit_curriculum_params = Parameters.empty() use_path_instead_of_goal = params.boolean("use-path-instead-of-goal", default=False) (training_instance_groups, test_instance_groups) = str_to_train_test_curriculum[curriculum_name] num_samples = params.optional_positive_integer("num_samples") # We need to be able to accept 0 as the number of noise objects but optional_integer doesn't currently # support specifying a range of acceptable values: https://github.com/isi-vista/vistautils/issues/142 num_noise_objects = params.optional_integer("num_noise_objects") if curriculum_name == "pursuit": return ( training_instance_groups( num_samples, num_noise_objects, language_generator, pursuit_curriculum_params=pursuit_curriculum_params, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], ) # optional argument to use path instead of goal elif use_path_instead_of_goal and curriculum_name in [ "m13-complete", "m13-shuffled", "m13-verbs-with-dynamic-prepositions", ]: return ( training_instance_groups( num_samples, num_noise_objects, language_generator, use_path_instead_of_goal, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], ) elif curriculum_name in ( "m15-object-noise-experiments", "m18-integrated-learners-experiment", ): return ( training_instance_groups( num_samples, num_noise_objects, language_generator, params=params.namespace_or_empty("train_curriculum"), ), test_instance_groups( 5, 0, language_generator, params=params.namespace_or_empty("test_curriculum"), ) if test_instance_groups else [], ) return ( training_instance_groups(num_samples, num_noise_objects, language_generator), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], )
def learner_factory_from_params( params: Parameters, graph_logger: Optional[HypothesisLogger], language_mode: LanguageMode = LanguageMode.ENGLISH, ) -> Callable[[], TopLevelLanguageLearner]: # type: ignore learner_type = params.string( "learner", [ "pursuit", "object-subset", "preposition-subset", "attribute-subset", "verb-subset", "integrated-learner", "integrated-learner-recognizer-without-generics", "integrated-learner-recognizer", "pursuit-gaze", "integrated-object-only", "integrated-learner-params", "integrated-pursuit-attribute-only", ], ) beam_size = params.positive_integer("beam_size", default=10) rng = random.Random() rng.seed(0) perception_generator = GAILA_PHASE_1_PERCEPTION_GENERATOR objects = [YOU_HACK, ME_HACK] objects.extend(PHASE_1_CURRICULUM_OBJECTS) # Eval hack! This is specific to the Phase 1 ontology object_recognizer = ObjectRecognizer.for_ontology_types( objects, determiners=ENGLISH_DETERMINERS, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, perception_generator=perception_generator, ) if learner_type == "pursuit": return lambda: ObjectPursuitLearner.from_parameters( params.namespace("pursuit"), graph_logger=graph_logger) elif learner_type == "pursuit-gaze": return lambda: IntegratedTemplateLearner( object_learner=PursuitObjectLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, ontology=GAILA_PHASE_2_ONTOLOGY, language_mode=language_mode, rank_gaze_higher=True, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), ) elif learner_type == "object-subset": return lambda: SubsetObjectLearner(ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH) elif learner_type == "attribute-subset": return lambda: SubsetAttributeLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "preposition-subset": return lambda: SubsetPrepositionLearner( # graph_logger=graph_logger, object_recognizer=object_recognizer, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "verb-subset": return lambda: SubsetVerbLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "integrated-learner": return lambda: IntegratedTemplateLearner( object_learner=SubsetObjectLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) elif learner_type == "integrated-learner-recognizer": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), generics_learner=SimpleGenericsLearner(), ) elif learner_type == "ic": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) elif learner_type == "integrated-object-only": object_learner_type = params.string( "object_learner_type", valid_options=["subset", "pbv", "pursuit"], default="subset", ) if params.has_namespace("learner_params"): learner_params = params.namespace("learner_params") else: learner_params = params.empty(namespace_prefix="learner_params") object_learner_factory: Callable[[], TemplateLearner] if object_learner_type == "subset": def subset_factory() -> SubsetObjectLearnerNew: return SubsetObjectLearnerNew( # type: ignore ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ) object_learner_factory = subset_factory elif object_learner_type == "pbv": def pbv_factory() -> ProposeButVerifyObjectLearner: return ProposeButVerifyObjectLearner.from_params( # type: ignore learner_params) object_learner_factory = pbv_factory elif object_learner_type == "pursuit": def pursuit_factory() -> PursuitObjectLearnerNew: return PursuitObjectLearnerNew( # type: ignore learning_factor=learner_params.floating_point( "learning_factor"), graph_match_confirmation_threshold=learner_params. floating_point("graph_match_confirmation_threshold"), lexicon_entry_threshold=learner_params.floating_point( "lexicon_entry_threshold"), rng=rng, smoothing_parameter=learner_params.floating_point( "smoothing_parameter"), ontology=GAILA_PHASE_2_ONTOLOGY, language_mode=language_mode, ) object_learner_factory = pursuit_factory else: raise RuntimeError( f"Invalid Object Learner Type Selected: {learner_type}") return lambda: IntegratedTemplateLearner(object_learner= object_learner_factory()) elif learner_type == "integrated-learner-params": object_learner = build_object_learner_factory( # type:ignore params.namespace_or_empty("object_learner"), beam_size, language_mode) attribute_learner = build_attribute_learner_factory( # type:ignore params.namespace_or_empty("attribute_learner"), beam_size, language_mode) relation_learner = build_relation_learner_factory( # type:ignore params.namespace_or_empty("relation_learner"), beam_size, language_mode) action_learner = build_action_learner_factory( # type:ignore params.namespace_or_empty("action_learner"), beam_size, language_mode) plural_learner = build_plural_learner_factory( # type:ignore params.namespace_or_empty("plural_learner"), beam_size, language_mode) return lambda: IntegratedTemplateLearner( object_learner=object_learner, attribute_learner=attribute_learner, relation_learner=relation_learner, action_learner=action_learner, functional_learner=FunctionalLearner(language_mode=language_mode) if params.boolean("include_functional_learner", default=True) else None, generics_learner=SimpleGenericsLearner() if params.boolean( "include_generics_learner", default=True) else None, plural_learner=plural_learner, suppress_error=params.boolean("suppress_error", default=True), ) elif learner_type == "integrated-pursuit-attribute-only": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=PursuitAttributeLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, rank_gaze_higher=False, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, ), ) else: raise RuntimeError("can't happen")
def integrated_experiment_entry_point(params: Parameters) -> None: initialize_vista_pegasus_wrapper(params) baseline_parameters = params.namespace("integrated_learners_experiment") pursuit_resource_request_params = params.namespace( "pursuit_resource_request") # This code is commented out but may be used in the near future to add language ablation # Capabilities to this curriculum. # get the minimum and maximum accuracy of the language with the situation # min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1) # max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5) # num_language_accuracy_increment = params.integer( # "num_language_accuracy_increment", default=5 # ) # values_for_accuracy = np.linspace( # min_language_accuracy, max_language_accuracy, num_language_accuracy_increment # ) # Get if attributes or relations should be included include_attributes = params.boolean("include_attributes", default=True) include_relations = params.boolean("include_relations", default=True) limit_jobs_for_category( "pursuit_job_limit", params.integer("num_pursuit_learners_active", default=8)) curriculum_repository_path = params.creatable_directory( "curriculum_repository_path") # Job to build desired curriculum(s) which our learners use curriculum_dependencies = immutableset(( CURRICULUM_NAME_FORMAT.format( noise=add_noise, shuffled=shuffle, relations=include_relations, attributes=include_attributes, ), run_python_on_parameters( Locator( CURRICULUM_NAME_FORMAT.format( noise=add_noise, shuffled=shuffle, relations=include_relations, attributes=include_attributes, ).split("-")), generate_curriculum_script, baseline_parameters.unify({ "train_curriculum": Parameters.from_mapping(CURRICULUM_PARAMS).unify( { "add_noise": add_noise, "shuffled": shuffle, "include_attributes": include_attributes, "include_relations": include_relations, }).as_mapping() }).unify(FIXED_PARAMETERS).unify( {"curriculum_repository_path": curriculum_repository_path}), depends_on=[], ), Parameters.from_mapping(CURRICULUM_PARAMS).unify( { "add_noise": add_noise, "shuffled": shuffle, "include_attributes": include_attributes, "include_relations": include_relations, }), ) for add_noise in (True, False) for shuffle in (True, False)) # jobs to build experiment for (curriculum_str, curriculum_dep, curr_params) in curriculum_dependencies: object_learner_type = params.string( "object_learner.learner_type", valid_options=["pursuit", "subset", "pbv"], default="pursuit", ) attribute_learner_type = params.string( "attribute_learner.learner__type", valid_options=["none", "pursuit", "subset"], default="pursuit", ) relation_learner_type = params.string( "relation_learner.learner_type", valid_options=["none", "pursuit", "subset"], default="pursuit", ) experiment_name_string = EXPERIMENT_NAME_FORMAT.format( curriculum_name=curriculum_str.replace("-", "+"), object_learner=object_learner_type, attribute_learner=attribute_learner_type, relation_learner=relation_learner_type, ) experiment_name = Locator(experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify(FIXED_PARAMETERS).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "load_from_curriculum_repository": curriculum_repository_path, "train_curriculum": curr_params, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[curriculum_dep], resource_request=SlurmResourceRequest.from_parameters( pursuit_resource_request_params) if "pursuit" in [ object_learner_type, attribute_learner_type, relation_learner_type ] else None, category="pursuit" if "pursuit" in [ object_learner_type, attribute_learner_type, relation_learner_type ] else "subset", use_pypy=True, ) write_workflow_description()
def object_language_ablation_runner_entry_point(params: Parameters) -> None: """This function creates all possible object language ablation param files within a given range""" initialize_vista_pegasus_wrapper(params) baseline_parameters = params.namespace("object_language_ablation") pursuit_resource_request_params = params.namespace( "pursuit_resource_request") # get the minimum and maximum number of objects in a scene min_num_objects = params.integer("min_num_objects", default=1) max_num_objects = params.integer("max_num_objects", default=7) # get the minimum and maximum accuracy of the language with the situation min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1) max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5) num_language_accuracy_increment = params.integer( "num_language_accuracy_increment", default=5) values_for_accuracy = np.linspace(min_language_accuracy, max_language_accuracy, num_language_accuracy_increment) limit_jobs_for_category( "pursuit", params.integer("num_pursuit_learners_active", default=8)) for num_objects in range(min_num_objects, max_num_objects + 1): for language_accuracy in values_for_accuracy: for learner_type in LEARNER_VALUES_TO_PARAMS: for params_str, learner_params in LEARNER_VALUES_TO_PARAMS[ learner_type]: experiment_name_string = EXPERIMENT_NAME_FORMAT.format( num_objects=num_objects, language_accuracy=language_accuracy, learner_type=learner_type, learner_params=params_str, ) experiment_name = Locator( experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify( FIXED_PARAMETERS ).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "train_curriculum": { "accurate_language_percentage": float(language_accuracy) }, "object_learner_type": learner_type, "object_learner": learner_params, # We subtract one because the target object is a given "num_noise_objects": num_objects - 1, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[], resource_request=SlurmResourceRequest.from_parameters( pursuit_resource_request_params) if learner_type == "pursuit" else None, category=learner_type, ) write_workflow_description()
def curriculum_from_params(params: Parameters, language_mode: LanguageMode = LanguageMode.ENGLISH): str_to_train_test_curriculum: Mapping[str, Tuple[ CURRICULUM_BUILDER, Optional[CURRICULUM_BUILDER]]] = { "m6-deniz": (make_m6_curriculum, None), "each-object-by-itself": ( build_each_object_by_itself_curriculum_train, build_each_object_by_itself_curriculum_test, ), "pursuit": ( build_pursuit_curriculum, build_each_object_by_itself_curriculum_test, ), "m6-preposition": (build_m6_prepositions_curriculum, None), "m9-objects": (build_gaila_phase1_object_curriculum, None), "m9-attributes": (build_gaila_phase1_attribute_curriculum, None), "m9-relations": (build_gaila_phase1_relation_curriculum, None), "m9-events": (build_gaila_phase1_verb_curriculum, None), "m9-debug": (build_debug_curriculum_train, build_debug_curriculum_test), "m9-complete": (build_gaila_phase_1_curriculum, None), "m13-imprecise-size": (make_imprecise_size_curriculum, None), "m13-imprecise-temporal": (make_imprecise_temporal_descriptions, None), "m13-subtle-verb-distinction": (make_subtle_verb_distinctions_curriculum, None), "m13-object-restrictions": (build_functionally_defined_objects_curriculum, None), "m13-functionally-defined-objects": ( build_functionally_defined_objects_train_curriculum, build_functionally_defined_objects_curriculum, ), "m13-generics": (build_generics_curriculum, None), "m13-complete": (build_gaila_m13_curriculum, None), "m13-verbs-with-dynamic-prepositions": ( make_verb_with_dynamic_prepositions_curriculum, None, ), "m13-shuffled": (build_m13_shuffled_curriculum, build_gaila_m13_curriculum), "m13-relations": (make_prepositions_curriculum, None), } curriculum_name = params.string("curriculum", str_to_train_test_curriculum.keys()) language_generator = phase2_language_generator(language_mode) if params.has_namespace("pursuit-curriculum-params"): pursuit_curriculum_params = params.namespace( "pursuit-curriculum-params") else: pursuit_curriculum_params = Parameters.empty() (training_instance_groups, test_instance_groups) = str_to_train_test_curriculum[curriculum_name] num_samples = params.optional_positive_integer("num_samples") num_noise_objects = params.optional_positive_integer("num_noise_objects") return ( training_instance_groups(num_samples, num_noise_objects, language_generator) if curriculum_name != "pursuit" else training_instance_groups( num_samples, num_noise_objects, language_generator, pursuit_curriculum_params=pursuit_curriculum_params, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], )
def learner_factory_from_params( params: Parameters, graph_logger: Optional[HypothesisLogger], language_mode: LanguageMode = LanguageMode.ENGLISH, ) -> Callable[[], TopLevelLanguageLearner]: # type: ignore learner_type = params.string( "learner", [ "pursuit", "object-subset", "preposition-subset", "attribute-subset", "verb-subset", "integrated-learner", "integrated-learner-recognizer", "pursuit-gaze", ], ) beam_size = params.positive_integer("beam_size", default=10) if language_mode == LanguageMode.CHINESE and learner_type not in [ "integrated-learner", "integrated-learner-recognizer", ]: raise RuntimeError( "Only able to test Chinese with integrated learner.") rng = random.Random() rng.seed(0) perception_generator = GAILA_PHASE_1_PERCEPTION_GENERATOR objects = [YOU_HACK, ME_HACK] objects.extend(PHASE_1_CURRICULUM_OBJECTS) # Eval hack! This is specific to the Phase 1 ontology object_recognizer = ObjectRecognizer.for_ontology_types( objects, determiners=ENGLISH_DETERMINERS, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, perception_generator=perception_generator, ) if learner_type == "pursuit": return lambda: ObjectPursuitLearner.from_parameters( params.namespace("pursuit"), graph_logger=graph_logger) elif learner_type == "pursuit-gaze": return lambda: IntegratedTemplateLearner( object_learner=PursuitObjectLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, ontology=GAILA_PHASE_2_ONTOLOGY, language_mode=language_mode, rank_gaze_higher=True, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), ) elif learner_type == "object-subset": return lambda: SubsetObjectLearner(ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH) elif learner_type == "attribute-subset": return lambda: SubsetAttributeLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "preposition-subset": return lambda: SubsetPrepositionLearner( # graph_logger=graph_logger, object_recognizer=object_recognizer, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "verb-subset": return lambda: SubsetVerbLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "integrated-learner": return lambda: IntegratedTemplateLearner( object_learner=SubsetObjectLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) elif learner_type == "integrated-learner-recognizer": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) else: raise RuntimeError("can't happen")