def test_double_context_fail(self): # cannot specify both deprecated context argument and new included_context argument with self.assertRaises(ParameterError): YAMLParametersLoader().load( f='foo: "foo"', context=Parameters.empty(), included_context=Parameters.empty(), )
def test_namespace_prefix(self): assert Parameters.from_mapping({ "hello": { "world": { "foo": "bar" } } }).namespace("hello").namespace("world").namespace_prefix == ("hello", "world") assert Parameters.empty( namespace_prefix=("foo", )).namespace_prefix == ("foo", ) # test it works even for empty parameters assert Parameters.empty().namespace_or_empty("foo").namespace_or_empty( "bar").namespace_prefix == ("foo", "bar")
def add_saga_cluster_to_sites( sites_catalog: SiteCatalog, params: Parameters = Parameters.empty()) -> None: home = params.string("home_dir", default=str(Path.home().absolute())) data_configuration = params.string("data_configuration", default="sharedfs") shared_scratch_dir = params.string( "saga_shared_scratch", default=f"{home}/workflows/shared-scratch") saga = Site("saga", arch=Arch.X86_64, os_type=OS.LINUX) saga.add_directories( Directory(Directory.SHARED_SCRATCH, shared_scratch_dir).add_file_servers( FileServer("file://" + shared_scratch_dir, Operation.ALL))) saga.add_env(key="PEGASUS_HOME", value="/nas/gaia/shared/cluster/pegasus5/pegasus-5.0.0") # Profiles saga.add_pegasus_profile(style="glite", auxillary_local=True, data_configuration=data_configuration) saga.add_condor_profile(grid_resource="batch slurm") sites_catalog.add_sites(saga)
def pre_observer( self, *, params: Parameters = Parameters.empty(), experiment_group_dir: Optional[Path] = None, ) -> "DescriptionObserver": # type: ignore track_accuracy = params.boolean("include_acc_observer", default=False) log_accuracy = params.boolean("accuracy_to_txt", default=False) log_accuracy_path = params.string( "accuracy_logging_path", default=f"{experiment_group_dir}/accuracy_pre_out.txt" if experiment_group_dir else "accuracy_pre_out.txt", ) track_precision_recall = params.boolean("include_pr_observer", default=False) log_precision_recall = params.boolean("log_pr", default=False) log_precision_recall_path = params.string( "pr_log_path", default=f"{experiment_group_dir}/pr_post_out.txt" if experiment_group_dir else "pr_post_out.txt", ) return HTMLLoggerPreObserver( name="Pre-observer", html_logger=self, candidate_accuracy_observer=CandidateAccuracyObserver( name="Pre-observer-acc", accuracy_to_txt=log_accuracy, txt_path=log_accuracy_path, ) if track_accuracy else None, precision_recall_observer=PrecisionRecallObserver( name="Pre-observer-pr", make_report=log_precision_recall, txt_path=log_precision_recall_path, ) if track_precision_recall else None, )
def build_each_object_by_itself_curriculum_test( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[HighLevelSemanticsSituation, LinearizedDependencyTree], *, params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: # pylint: disable=unused-argument return [ _make_each_object_by_itself_curriculum(num_samples, num_noise_objects, language_generator) ]
def test_absents(self): empty_params = Parameters.empty() assert empty_params.optional_arbitrary_list("foo") is None assert empty_params.optional_boolean("foo") is None assert empty_params.optional_creatable_directory("foo") is None assert empty_params.optional_creatable_empty_directory("foo") is None assert empty_params.optional_creatable_file("foo") is None assert empty_params.optional_existing_directory("foo") is None assert empty_params.optional_existing_file("foo") is None assert empty_params.optional_floating_point("foo") is None assert empty_params.optional_integer("foo") is None assert empty_params.optional_namespace("foo") is None assert empty_params.optional_positive_integer("foo") is None assert empty_params.optional_string("foo") is None
def build_pursuit_curriculum( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[ HighLevelSemanticsSituation, LinearizedDependencyTree ], *, pursuit_curriculum_params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: num_instances = pursuit_curriculum_params.integer( "num_instances", default=num_samples if num_samples else 10 ) num_noise_instances = pursuit_curriculum_params.integer( "num_noise_instances", default=num_noise_objects if num_noise_objects else 2 ) num_objects_in_instance = pursuit_curriculum_params.integer( "num_objects_in_instance", default=3 ) add_gaze = pursuit_curriculum_params.boolean("add_gaze", default=False) prob_given = pursuit_curriculum_params.floating_point("prob_given", default=1.0) prob_not_given = pursuit_curriculum_params.floating_point( "prob_not_given", default=0.0 ) rng = random.Random() rng.seed(0) gaze_perciever = GazePerceivedNoisily( rng=rng, prob_gaze_perceived_given_gaze=prob_given, prob_gaze_perceived_given_not_gaze=prob_not_given, ) perception_generator = HighLevelSemanticsSituationToDevelopmentalPrimitivePerceptionGenerator( ontology=GAILA_PHASE_2_ONTOLOGY, gaze_strategy=gaze_perciever ) return [ make_simple_pursuit_curriculum( target_objects=M6_CURRICULUM_ALL_OBJECTS, num_instances=num_instances, num_objects_in_instance=num_objects_in_instance, num_noise_instances=num_noise_instances, language_generator=language_generator, add_gaze=add_gaze, perception_generator=perception_generator, ) ]
def test_observer( self, *, params: Parameters = Parameters.empty(), experiment_group_dir: Optional[Path] = None, ) -> "DescriptionObserver": # type: ignore # these are the params to use for writing accuracy to a text file at every iteration (e.g. to graph later) track_accuracy = params.boolean("include_acc_observer", default=True) log_accuracy = params.boolean("accuracy_to_txt", default=False) log_accuracy_path = params.string( "accuracy_logging_path", default=f"{experiment_group_dir}/accuracy_test_out.txt" if experiment_group_dir else "accuracy_test_out.txt", ) track_precision_recall = params.boolean("include_pr_observer", default=False) log_precision_recall = params.boolean("log_pr", default=False) log_precision_recall_path = params.string( "pr_log_path", default=f"{experiment_group_dir}/pr_test_out.txt" if experiment_group_dir else "pr_test_out.txt", ) accuracy_observer = None precision_recall_observer = None if track_accuracy: accuracy_observer = CandidateAccuracyObserver( name="Test-observer-acc", accuracy_to_txt=log_accuracy, txt_path=log_accuracy_path, ) if track_precision_recall: precision_recall_observer = PrecisionRecallObserver( name="Test-observer-pr", make_report=log_precision_recall, txt_path=log_precision_recall_path, ) return HTMLLoggerPostObserver( name="t-observer", html_logger=self, candidate_accuracy_observer=accuracy_observer, precision_recall_observer=precision_recall_observer, test_mode=True, )
def add_local_nas_to_sites( sites_catalog: SiteCatalog, params: Parameters = Parameters.empty()) -> None: home = params.string("home_dir", default=str(Path.home().absolute())) shared_scratch_dir = params.string("local_shared_scratch", default=f"{home}/workflows/scratch") local_storage_dir = params.string("local_storage", default=f"{home}/workflows/output") sites_catalog.add_sites( Site("local", arch=Arch.X86_64, os_type=OS.LINUX).add_directories( Directory(Directory.SHARED_SCRATCH, shared_scratch_dir).add_file_servers( FileServer("file://" + shared_scratch_dir, Operation.ALL)), Directory(Directory.LOCAL_STORAGE, local_storage_dir).add_file_servers( FileServer("file://" + local_storage_dir, Operation.ALL)), ))
def test_optional_defaults(self): empty_params = Parameters.empty() default_list = [False] assert (empty_params.optional_arbitrary_list( # pylint: disable=unexpected-keyword-arg "foo", default=default_list) == default_list) assert empty_params.optional_boolean( # pylint: disable=unexpected-keyword-arg "foo", default=True) assert ( # pylint: disable=unexpected-keyword-arg empty_params.optional_floating_point("foo", default=-1.5) == -1.5) assert ( # pylint: disable=unexpected-keyword-arg empty_params.optional_integer("foo", default=-5) == -5) assert ( # pylint: disable=unexpected-keyword-arg empty_params.optional_positive_integer("foo", default=5) == 5) assert ( # pylint: disable=unexpected-keyword-arg empty_params.optional_string("foo", default="test") == "test") with self.assertRaises(ParameterError): empty_params.optional_floating_point( # pylint: disable=unexpected-keyword-arg "foo", default=-1.5, valid_range=Range.closed(0.0, 10.0))
def build_object_learner_experiment_curriculum_train( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[ HighLevelSemanticsSituation, LinearizedDependencyTree ], *, params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: situations = make_multiple_object_situation( num_samples, num_noise_objects, language_generator ) accurate_language_chance = params.floating_point( "accurate_language_percentage", default=0.5 ) output_situations = [] random.seed(params.integer("random_seed", default=0)) rng = RandomChooser.for_seed(params.integer("language_random_seed", default=0)) for (situation, language, perception) in situations.instances(): if random.random() <= accurate_language_chance: output_language = language else: # Make Invalid Language if situation and isinstance(situation, HighLevelSemanticsSituation): # First, gather all OntologyNodes which aren't already present in the situation present_ontology_nodes = [ _object.ontology_node for _object in situation.all_objects ] valid_other_objects = [ node for node in PHASE_1_CURRICULUM_OBJECTS if node not in present_ontology_nodes ] # Then choose one at random chosen_ontology_node = rng.choice(valid_other_objects) # Make a fake situation with just this object in it, ignoring colors wrong_situation = HighLevelSemanticsSituation( ontology=GAILA_PHASE_2_ONTOLOGY, salient_objects=[ SituationObject.instantiate_ontology_node( chosen_ontology_node, ontology=GAILA_PHASE_2_ONTOLOGY ) ], syntax_hints=[IGNORE_COLORS], ) # Generate the language as if it came from this fake situation rather than the original one fake_language = only( language_generator.generate_language(wrong_situation, chooser=rng) ) output_language = LinearizedDependencyTree( dependency_tree=fake_language.dependency_tree, surface_token_order=fake_language.surface_token_order, accurate=False, ) else: raise RuntimeError( f"Unable to make invalid language without a situation of type HighlevelSemanticsSituation. Got situation: {situation}" ) output_situations.append((situation, output_language, perception)) return [ AblatedLanguageSituationsInstanceGroup( name=f"{situations.name()}_ablated", instances=output_situations ) ]
def configure_saga_properities( # pylint: disable=unused-argument properties: Properties, params: Parameters = Parameters.empty()) -> None: properties["pegasus.data.configuration"] = "sharedfs" properties["pegasus.metrics.app"] = "SAGA" properties["dagman.retry"] = "0"
def learner_factory_from_params( params: Parameters, graph_logger: Optional[HypothesisLogger], language_mode: LanguageMode = LanguageMode.ENGLISH, ) -> Callable[[], TopLevelLanguageLearner]: # type: ignore learner_type = params.string( "learner", [ "pursuit", "object-subset", "preposition-subset", "attribute-subset", "verb-subset", "integrated-learner", "integrated-learner-recognizer-without-generics", "integrated-learner-recognizer", "pursuit-gaze", "integrated-object-only", "integrated-learner-params", "integrated-pursuit-attribute-only", ], ) beam_size = params.positive_integer("beam_size", default=10) rng = random.Random() rng.seed(0) perception_generator = GAILA_PHASE_1_PERCEPTION_GENERATOR objects = [YOU_HACK, ME_HACK] objects.extend(PHASE_1_CURRICULUM_OBJECTS) # Eval hack! This is specific to the Phase 1 ontology object_recognizer = ObjectRecognizer.for_ontology_types( objects, determiners=ENGLISH_DETERMINERS, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, perception_generator=perception_generator, ) if learner_type == "pursuit": return lambda: ObjectPursuitLearner.from_parameters( params.namespace("pursuit"), graph_logger=graph_logger) elif learner_type == "pursuit-gaze": return lambda: IntegratedTemplateLearner( object_learner=PursuitObjectLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, ontology=GAILA_PHASE_2_ONTOLOGY, language_mode=language_mode, rank_gaze_higher=True, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), ) elif learner_type == "object-subset": return lambda: SubsetObjectLearner(ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH) elif learner_type == "attribute-subset": return lambda: SubsetAttributeLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "preposition-subset": return lambda: SubsetPrepositionLearner( # graph_logger=graph_logger, object_recognizer=object_recognizer, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "verb-subset": return lambda: SubsetVerbLearner( ontology=GAILA_PHASE_1_ONTOLOGY, object_recognizer=object_recognizer, language_mode=LanguageMode.ENGLISH, ) elif learner_type == "integrated-learner": return lambda: IntegratedTemplateLearner( object_learner=SubsetObjectLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) elif learner_type == "integrated-learner-recognizer": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), generics_learner=SimpleGenericsLearner(), ) elif learner_type == "ic": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=SubsetAttributeLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), relation_learner=SubsetRelationLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), action_learner=SubsetVerbLearnerNew( ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ), functional_learner=FunctionalLearner(language_mode=language_mode), ) elif learner_type == "integrated-object-only": object_learner_type = params.string( "object_learner_type", valid_options=["subset", "pbv", "pursuit"], default="subset", ) if params.has_namespace("learner_params"): learner_params = params.namespace("learner_params") else: learner_params = params.empty(namespace_prefix="learner_params") object_learner_factory: Callable[[], TemplateLearner] if object_learner_type == "subset": def subset_factory() -> SubsetObjectLearnerNew: return SubsetObjectLearnerNew( # type: ignore ontology=GAILA_PHASE_2_ONTOLOGY, beam_size=beam_size, language_mode=language_mode, ) object_learner_factory = subset_factory elif object_learner_type == "pbv": def pbv_factory() -> ProposeButVerifyObjectLearner: return ProposeButVerifyObjectLearner.from_params( # type: ignore learner_params) object_learner_factory = pbv_factory elif object_learner_type == "pursuit": def pursuit_factory() -> PursuitObjectLearnerNew: return PursuitObjectLearnerNew( # type: ignore learning_factor=learner_params.floating_point( "learning_factor"), graph_match_confirmation_threshold=learner_params. floating_point("graph_match_confirmation_threshold"), lexicon_entry_threshold=learner_params.floating_point( "lexicon_entry_threshold"), rng=rng, smoothing_parameter=learner_params.floating_point( "smoothing_parameter"), ontology=GAILA_PHASE_2_ONTOLOGY, language_mode=language_mode, ) object_learner_factory = pursuit_factory else: raise RuntimeError( f"Invalid Object Learner Type Selected: {learner_type}") return lambda: IntegratedTemplateLearner(object_learner= object_learner_factory()) elif learner_type == "integrated-learner-params": object_learner = build_object_learner_factory( # type:ignore params.namespace_or_empty("object_learner"), beam_size, language_mode) attribute_learner = build_attribute_learner_factory( # type:ignore params.namespace_or_empty("attribute_learner"), beam_size, language_mode) relation_learner = build_relation_learner_factory( # type:ignore params.namespace_or_empty("relation_learner"), beam_size, language_mode) action_learner = build_action_learner_factory( # type:ignore params.namespace_or_empty("action_learner"), beam_size, language_mode) plural_learner = build_plural_learner_factory( # type:ignore params.namespace_or_empty("plural_learner"), beam_size, language_mode) return lambda: IntegratedTemplateLearner( object_learner=object_learner, attribute_learner=attribute_learner, relation_learner=relation_learner, action_learner=action_learner, functional_learner=FunctionalLearner(language_mode=language_mode) if params.boolean("include_functional_learner", default=True) else None, generics_learner=SimpleGenericsLearner() if params.boolean( "include_generics_learner", default=True) else None, plural_learner=plural_learner, suppress_error=params.boolean("suppress_error", default=True), ) elif learner_type == "integrated-pursuit-attribute-only": return lambda: IntegratedTemplateLearner( object_learner=ObjectRecognizerAsTemplateLearner( object_recognizer=object_recognizer, language_mode=language_mode), attribute_learner=PursuitAttributeLearnerNew( learning_factor=0.05, graph_match_confirmation_threshold=0.7, lexicon_entry_threshold=0.7, rng=rng, smoothing_parameter=0.002, rank_gaze_higher=False, ontology=GAILA_PHASE_1_ONTOLOGY, language_mode=language_mode, ), ) else: raise RuntimeError("can't happen")
def curriculum_from_params(params: Parameters, language_mode: LanguageMode = LanguageMode.ENGLISH): str_to_train_test_curriculum: Mapping[str, Tuple[ CURRICULUM_BUILDER, Optional[CURRICULUM_BUILDER]]] = { "m6-deniz": (make_m6_curriculum, None), "each-object-by-itself": ( build_each_object_by_itself_curriculum_train, build_each_object_by_itself_curriculum_test, ), "pursuit": ( build_pursuit_curriculum, build_each_object_by_itself_curriculum_test, ), "m6-preposition": (build_m6_prepositions_curriculum, None), "m9-objects": (build_gaila_phase1_object_curriculum, None), "m9-attributes": (build_gaila_phase1_attribute_curriculum, None), "m9-relations": (build_gaila_phase1_relation_curriculum, None), "m9-events": (build_gaila_phase1_verb_curriculum, None), "m9-debug": (build_debug_curriculum_train, build_debug_curriculum_test), "m9-complete": (build_gaila_phase_1_curriculum, None), "m13-imprecise-size": (make_imprecise_size_curriculum, None), "m13-imprecise-temporal": (make_imprecise_temporal_descriptions, None), "m13-subtle-verb-distinction": (make_subtle_verb_distinctions_curriculum, None), "m13-object-restrictions": (build_functionally_defined_objects_curriculum, None), "m13-functionally-defined-objects": ( build_functionally_defined_objects_train_curriculum, build_functionally_defined_objects_curriculum, ), "m13-generics": (build_generics_curriculum, None), "m13-complete": (build_gaila_m13_curriculum, None), "m13-verbs-with-dynamic-prepositions": ( make_verb_with_dynamic_prepositions_curriculum, None, ), "m13-shuffled": (build_m13_shuffled_curriculum, build_gaila_m13_curriculum), "m13-relations": (make_prepositions_curriculum, None), } curriculum_name = params.string("curriculum", str_to_train_test_curriculum.keys()) language_generator = phase2_language_generator(language_mode) if params.has_namespace("pursuit-curriculum-params"): pursuit_curriculum_params = params.namespace( "pursuit-curriculum-params") else: pursuit_curriculum_params = Parameters.empty() (training_instance_groups, test_instance_groups) = str_to_train_test_curriculum[curriculum_name] num_samples = params.optional_positive_integer("num_samples") num_noise_objects = params.optional_positive_integer("num_noise_objects") return ( training_instance_groups(num_samples, num_noise_objects, language_generator) if curriculum_name != "pursuit" else training_instance_groups( num_samples, num_noise_objects, language_generator, pursuit_curriculum_params=pursuit_curriculum_params, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], )
def test_object_from_parameters(self): @attrs class TestObj: val: int = attrib(default=None, validator=validators.optional( validators.instance_of(int))) @staticmethod def from_parameters(params: Parameters) -> "TestObj": return TestObj(params.integer("my_int")) simple_params = Parameters.from_mapping( {"test": { "factory": "TestObj", "my_int": 5 }}) self.assertEqual( TestObj(5), simple_params.object_from_parameters("test", TestObj, context=locals()), ) # test when object needs no further parameters for instantiation @attrs class ArglessTestObj: pass argless_params = Parameters.from_mapping( {"test": { "factory": "ArglessTestObj" }}) self.assertEqual( ArglessTestObj(), argless_params.object_from_parameters("test", ArglessTestObj, context=locals()), ) # test default_creator creator # pylint: disable=unused-argument def default_creator(params: Parameters) -> int: return 42 # test falling back to default creator self.assertEqual( 42, Parameters.empty().object_from_parameters( "missing_param", expected_type=int, default_factory=default_creator), ) # test missing parameter and no default creator with self.assertRaises(ParameterError): self.assertEqual( "fred", Parameters.empty().object_from_parameters("missing_param", default_factory=None, expected_type=str), ) # test no specified or default creator with self.assertRaises(ParameterError): Parameters.empty().object_from_parameters("missing_param", expected_type=int) # test default creator being invalid bad_default_creator = "foo" with self.assertRaises(ParameterError): Parameters.empty().object_from_parameters( "missing_param", expected_type=int, default_factory=bad_default_creator)
def integrated_pursuit_learner_experiment_curriculum( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[ HighLevelSemanticsSituation, LinearizedDependencyTree ], *, params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: # Load Parameters add_noise = params.boolean("add_noise", default=False) block_multiple_of_same_type = params.boolean( "block_multiple_of_same_type", default=True ) include_targets_in_noise = params.boolean("include_targets_in_noise", default=False) min_noise_objects = params.integer("min_noise_objects", default=0) max_noise_objects = params.integer( "max_noise_objects", default=num_noise_objects if num_noise_objects else 10 ) min_noise_relations = params.integer("min_noise_relations", default=0) max_noise_relations = params.integer("max_noise_relations", default=5) # This value ensure that pursuit gets at least 6 instances of any example # As otherwise the lexicalization system might not lexicalize it # But if there's lots of variants for noise we don't want to have thousands of examples # As could happen combinatorially min_samples_per_noise_object_relation_pair = ( max( 6 // ( max_noise_relations - min_noise_relations + min_noise_objects - max_noise_objects ), 1, ) if add_noise else 6 ) if num_samples is None: num_samples = 50 # Random Number Generator for Curriculum Use rng = random.Random() rng.seed(params.integer("random_seed", default=0)) # Random Chooser for Curriculum Generation chooser = RandomChooser.for_seed(params.integer("chooser_seed", default=0)) # Noise Elements noise_objects_sets: ImmutableSet[ImmutableSet[TemplateObjectVariable]] = immutableset( [ immutableset( [ standard_object( f"{i}_noise_object_{num}", THING, required_properties=[INTEGRATED_EXPERIMENT_PROP], ) for num in range(i) ] ) for i in range(min_noise_objects, max_noise_objects) ] ) if noise_objects_sets.empty() or not add_noise: noise_objects_sets = immutableset(immutableset()) target_objects = [ standard_object(node.handle, node) for node in INTEGRATED_EXPERIMENT_CURRICULUM_OBJECTS ] target_color_objects = [ standard_object(f"{node.handle}_{color.handle}", node, added_properties=[color]) for node in INTEGRATED_EXPERIMENT_CURRICULUM_OBJECTS for color in INTEGRATED_EXPERIMENT_COLORS if node not in [ZUP, SPAD, DAYGIN, MAWG, TOMBUR, GLIM] ] # We use a max of 1 here to account for when noise values are not used as otherwise # We'd be multiplying by 0 and cause div by 0 errors samples_to_template_den = ( len(target_objects) * max(len(noise_objects_sets), 1) * max((max_noise_relations - min_noise_relations), 1) ) ordered_curriculum = [ _single_object_described_curriculum( num_samples, target_objects, noise_objects_sets, min_noise_relations=min_noise_relations, max_noise_relations=max_noise_relations, add_noise=add_noise, chooser=chooser, samples_to_template_den=samples_to_template_den, block_multiple_of_same_type=block_multiple_of_same_type, language_generator=language_generator, include_targets_in_noise=include_targets_in_noise, min_samples=min_samples_per_noise_object_relation_pair, ) ] if params.boolean("include_attributes", default=True): ordered_curriculum.append( _single_attribute_described_curriculum( num_samples, target_color_objects, noise_objects_sets, min_noise_relations=min_noise_relations, max_noise_relations=max_noise_relations, add_noise=add_noise, chooser=chooser, samples_to_template_den=samples_to_template_den, block_multiple_of_same_type=block_multiple_of_same_type, language_generator=language_generator, include_targets_in_noise=include_targets_in_noise, min_samples=min_samples_per_noise_object_relation_pair, ) ) if params.boolean("include_relations", default=True): ordered_curriculum.append( _prepositional_relation_described_curriculum( num_samples, noise_objects_sets, min_noise_relations=min_noise_relations, max_noise_relations=max_noise_relations, add_noise=add_noise, chooser=chooser, samples_to_template_den=samples_to_template_den, block_multiple_of_same_type=block_multiple_of_same_type, language_generator=language_generator, include_targets_in_noise=include_targets_in_noise, min_samples=min_samples_per_noise_object_relation_pair, ) ) # Convert the 'from situation instances' into explicit instances this allows for # 1) Less computation time on the learner experiment to generate the perception graphs # 2) Allows us to shuffle the output order which we otherwise can't do explicit_instances = [ instance for sit in ordered_curriculum for instance in sit.instances() ] return [ ExplicitWithSituationInstanceGroup( name="m18-integrated-learners-experiment", instances=tuple(shuffle_curriculum(explicit_instances, rng=rng)) if params.boolean("shuffled", default=False) else tuple(explicit_instances), ) ]
def integrated_pursuit_learner_experiment_test( num_samples: Optional[int], num_noise_objects: Optional[int], language_generator: LanguageGenerator[ HighLevelSemanticsSituation, LinearizedDependencyTree ], *, params: Parameters = Parameters.empty(), ) -> Sequence[Phase1InstanceGroup]: # pylint: disable=unused-argument # Random Number Generator for Curriculum Use rng = random.Random() rng.seed(params.integer("random_seed", default=1)) # Random Chooser for Curriculum Generation chooser = RandomChooser.for_seed(params.integer("chooser_seed", default=1)) if num_samples is None: num_samples = 5 target_objects = [ standard_object(node.handle, node) for node in INTEGRATED_EXPERIMENT_CURRICULUM_OBJECTS ] target_color_objects = [ standard_object(f"{node.handle}_{color.handle}", node, added_properties=[color]) for node in INTEGRATED_EXPERIMENT_CURRICULUM_OBJECTS for color in INTEGRATED_EXPERIMENT_COLORS if node not in [ZUP, SPAD, DAYGIN, MAWG, TOMBUR, GLIM] ] ordered_curriculum = [ _single_object_described_curriculum( num_samples, target_objects, immutableset(immutableset()), add_noise=False, chooser=chooser, block_multiple_of_same_type=True, language_generator=language_generator, min_samples=num_samples, ) ] if params.boolean("include_attributes", default=True): ordered_curriculum.append( _single_attribute_described_curriculum( num_samples, target_color_objects, immutableset(immutableset()), add_noise=False, chooser=chooser, block_multiple_of_same_type=True, language_generator=language_generator, min_samples=num_samples, ) ) if params.boolean("include_relations", default=True): ordered_curriculum.append( _prepositional_relation_described_curriculum( num_samples, immutableset(immutableset()), add_noise=False, chooser=chooser, block_multiple_of_same_type=True, language_generator=language_generator, ) ) # Convert the 'from situation instances' into explicit instances this allows for # 1) Less computation time on the learner experiment to generate the perception graphs # 2) Allows us to shuffle the output order which we otherwise can't do explicit_instances = [ instance for sit in ordered_curriculum for instance in sit.instances() ] return [ ExplicitWithSituationInstanceGroup( name="m18-integrated-learners-experiment-test", instances=tuple(shuffle_curriculum(explicit_instances, rng=rng)) if params.boolean("shuffled", default=False) else tuple(explicit_instances), ) ]
def curriculum_from_params(params: Parameters, language_mode: LanguageMode = LanguageMode.ENGLISH): str_to_train_test_curriculum: Mapping[str, Tuple[ CURRICULUM_BUILDER, Optional[CURRICULUM_BUILDER]]] = { "m6-deniz": (make_m6_curriculum, None), "each-object-by-itself": ( build_each_object_by_itself_curriculum_train, build_each_object_by_itself_curriculum_test, ), "pursuit": ( build_pursuit_curriculum, build_each_object_by_itself_curriculum_test, ), "m6-preposition": (build_m6_prepositions_curriculum, None), "m9-objects": (build_gaila_phase1_object_curriculum, None), "m9-attributes": (build_gaila_phase1_attribute_curriculum, None), "chinese-classifiers": (build_classifier_curriculum, None), "m9-relations": (build_gaila_phase1_relation_curriculum, None), "m9-events": (build_gaila_phase1_verb_curriculum, None), "m9-debug": (build_debug_curriculum_train, build_debug_curriculum_test), "m9-complete": (build_gaila_phase_1_curriculum, None), "m13-imprecise-size": (make_imprecise_size_curriculum, None), "m13-imprecise-temporal": (make_imprecise_temporal_descriptions, None), "m13-subtle-verb-distinction": (make_subtle_verb_distinctions_curriculum, None), "m13-object-restrictions": (build_functionally_defined_objects_curriculum, None), "m13-functionally-defined-objects": ( build_functionally_defined_objects_train_curriculum, build_functionally_defined_objects_curriculum, ), "m13-generics": (build_generics_curriculum, None), "m13-complete": (build_gaila_m13_curriculum, None), "m13-verbs-with-dynamic-prepositions": ( make_verb_with_dynamic_prepositions_curriculum, None, ), "m13-shuffled": (build_m13_shuffled_curriculum, build_gaila_m13_curriculum), "m13-relations": (make_prepositions_curriculum, None), "actions-and-generics-curriculum": (build_actions_and_generics_curriculum, None), "m15-object-noise-experiments": ( build_object_learner_experiment_curriculum_train, build_each_object_by_itself_curriculum_test, ), "m18-integrated-learners-experiment": ( integrated_pursuit_learner_experiment_curriculum, integrated_pursuit_learner_experiment_test, ), } curriculum_name = params.string("curriculum", str_to_train_test_curriculum.keys()) language_generator = ( integrated_experiment_language_generator(language_mode) if curriculum_name == "m18-integrated-learners-experiment" else phase2_language_generator(language_mode)) if params.has_namespace("pursuit-curriculum-params"): pursuit_curriculum_params = params.namespace( "pursuit-curriculum-params") else: pursuit_curriculum_params = Parameters.empty() use_path_instead_of_goal = params.boolean("use-path-instead-of-goal", default=False) (training_instance_groups, test_instance_groups) = str_to_train_test_curriculum[curriculum_name] num_samples = params.optional_positive_integer("num_samples") # We need to be able to accept 0 as the number of noise objects but optional_integer doesn't currently # support specifying a range of acceptable values: https://github.com/isi-vista/vistautils/issues/142 num_noise_objects = params.optional_integer("num_noise_objects") if curriculum_name == "pursuit": return ( training_instance_groups( num_samples, num_noise_objects, language_generator, pursuit_curriculum_params=pursuit_curriculum_params, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], ) # optional argument to use path instead of goal elif use_path_instead_of_goal and curriculum_name in [ "m13-complete", "m13-shuffled", "m13-verbs-with-dynamic-prepositions", ]: return ( training_instance_groups( num_samples, num_noise_objects, language_generator, use_path_instead_of_goal, ), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], ) elif curriculum_name in ( "m15-object-noise-experiments", "m18-integrated-learners-experiment", ): return ( training_instance_groups( num_samples, num_noise_objects, language_generator, params=params.namespace_or_empty("train_curriculum"), ), test_instance_groups( 5, 0, language_generator, params=params.namespace_or_empty("test_curriculum"), ) if test_instance_groups else [], ) return ( training_instance_groups(num_samples, num_noise_objects, language_generator), test_instance_groups(num_samples, num_noise_objects, language_generator) if test_instance_groups else [], )