def test_dax_with_categories(tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "home_dir": str(tmp_path), }) initialize_vista_pegasus_wrapper(workflow_params) multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_output_file = tmp_path / "multiplied_nums.txt" multiply_input_file = tmp_path / "raw_nums.txt" multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) multiply_job_category = "arithmetic" run_python_on_parameters( multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[], category=multiply_job_category, ) # Check that the multiply job has the appropriate category set in the DAX file dax_file = write_workflow_description() assert dax_file.exists() assert _job_in_dax_has_category(dax_file, multiply_job_name, multiply_job_category) assert not _job_in_dax_has_category(dax_file, multiply_job_name, "an-arbitrary-category")
def test_not_clearing_ckpts(monkeypatch, tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "scavenge", "home_dir": str(tmp_path), }) initialize_vista_pegasus_wrapper(workflow_params) multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_output_file = tmp_path / "multiplied_nums.txt" multiply_input_file = tmp_path / "raw_nums.txt" multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) multiple_dir = directory_for(multiply_job_name) checkpointed_multiply_file = multiple_dir / "___ckpt" checkpointed_multiply_file.touch() multiply_output_file.touch() run_python_on_parameters(multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[]) monkeypatch.setattr("builtins.input", lambda _: "n") write_workflow_description() assert checkpointed_multiply_file.exists()
def test_composed_key_value_transform(tmp_path): kvs = {"doc1": 5, "doc2": 10} def add1(values, **kwargs): # pylint:disable=unused-argument return {key: val + 1 for key, val in values.items()} def subtract2(values, **kwargs): # pylint:disable=unused-argument return {key: val - 2 for key, val in values.items()} composed_transforms = compose_key_value_store_transforms( transforms=[add1, subtract2]) params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "home_dir": str(tmp_path), }) initialize_vista_pegasus_wrapper(params) transformed_kvs = transform_key_value_store(kvs, composed_transforms, output_locator=Locator([]), parallelism=1) expected_kvs = {"doc1": 4, "doc2": 9} assert expected_kvs == transformed_kvs
def example_workflow(params: Parameters): # pragma: no cover """ An example script to generate a container workflow for submission to Pegasus. """ tmp_path = params.creatable_directory("example_root_dir") docker_tar = params.creatable_file("docker_tar") docker_build_dir = params.existing_directory("docker_build_dir") docker_image_name = params.string( "docker_image_name", default="pegasus_wrapper_container_demo" ) docker_image_tag = params.string("docker_image_tag", default="0.2") mongo_db_tar = params.string( "mongo_db_tar", default="/nas/gaia/shared/cluster/docker/mongo-4.4.tar" ) monogo_db_data = "/scratch/dockermount/pegasus_wrapper_tmp/data" mongo_db_config = "/scratch/dockermount/pegasus_wrapper_tmp/config" # Generating parameters for initializing a workflow # We recommend making workflow directory, site, and partition parameters # in an research workflow workflow_params = Parameters.from_mapping( { "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "home_dir": str(tmp_path), "partition": "scavenge", } ) saga31_request = SlurmResourceRequest.from_parameters( Parameters.from_mapping({"run_on_single_node": "saga31", "partition": "gaia"}) ) workflow_params = workflow_params.unify(params) # Our source input for the sample jobs input_file = tmp_path / "raw_nums.txt" add_y_output_file_nas = tmp_path / "nums_y.txt" sorted_output_file_nas = tmp_path / "sorted.txt" random = Random() random.seed(0) nums = [int(random.random() * 100) for _ in range(0, 25)] # Base Job Locator job_locator = Locator(("jobs",)) docker_python_root = Path("/home/app/") job_profile = PegasusProfile( namespace="pegasus", key="transfer.bypass.input.staging", value="True" ) # Write a list of numbers out to be able to run the workflow with input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) initialize_vista_pegasus_wrapper(workflow_params) build_container = run_bash( job_locator / "build_docker", command=[ "mkdir -p /scratch/dockermount/pegasus_wrapper_tmp", f"cd {docker_build_dir}", f"docker build . -t {docker_image_name}:{docker_image_tag}", f"docker save -o /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_image_name}:{docker_image_tag}", f"cp /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_tar.absolute()}", f"chmod go+r {docker_tar.absolute()}", f"docker load --input {mongo_db_tar}", f"mkdir -p {monogo_db_data}", f"mkdir -p {mongo_db_config}", ], depends_on=[], resource_request=saga31_request, ) python36 = add_container( f"{docker_image_name}:{docker_image_tag}", "docker", str(docker_tar.absolute()), image_site="saga", bypass_staging=True, ) mongo4_4 = add_container( "mongo:4.4", "docker", mongo_db_tar, image_site="saga", bypass_staging=True ) start_mongo = start_docker_as_service( mongo4_4, depends_on=[build_container], mounts=[f"{monogo_db_data}:/data/db", f"{mongo_db_config}/etc/custom"], docker_args=f"-p 27017:27017", resource_request=saga31_request, ) add_y_job = run_python_on_args( job_locator / "add", docker_python_root / "add_y.py", set_args=f"{input_file} {add_y_output_file_nas} --y 10", depends_on=[build_container], job_profiles=[job_profile], resource_request=saga31_request, container=python36, input_file_paths=[input_file], output_file_paths=[add_y_output_file_nas], ) sort_job = run_python_on_parameters( job_locator / "sort", sort_nums_in_file, {"input_file": add_y_output_file_nas, "output_file": sorted_output_file_nas}, depends_on=[add_y_job], container=python36, job_profiles=[job_profile], resource_request=saga31_request, input_file_paths=add_y_output_file_nas, output_file_paths=sorted_output_file_nas, ) _ = stop_docker_as_service( mongo4_4, depends_on=[start_mongo, sort_job], resource_request=saga31_request ) # Generate the Pegasus DAX file & a Submit Script write_workflow_description(tmp_path)
def example_workflow(params: Parameters): """ An example script to generate a workflow for submission to Pegasus. """ tmp_path = params.creatable_directory("example_root_dir") # Generating parameters for initializing a workflow # We recommend making workflow directory, site, and partition parameters # in an research workflow workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", }) workflow_params = workflow_params.unify(params) # Our source input for the sample jobs multiply_input_file = tmp_path / "raw_nums.txt" random = Random() random.seed(0) nums = [int(random.random() * 100) for _ in range(0, 25)] multiply_output_file = tmp_path / "multiplied_nums.txt" sorted_output_file = tmp_path / "sorted_nums.txt" # Base Job Locator job_locator = Locator(("jobs", )) # Write a list of numbers out to be able to run the workflow with multiply_input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) initialize_vista_pegasus_wrapper(workflow_params) multiply_artifact = ValueArtifact( multiply_output_file, depends_on=run_python_on_parameters( job_locator / "multiply", multiply_by_x, { "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4, "logfile": str(tmp_path / "multiply_log.txt"), }, depends_on=[], ), locator=Locator("multiply"), ) run_python_on_parameters( job_locator / "sort", sort_nums_in_file, { "input_file": multiply_output_file, "output_file": sorted_output_file }, depends_on=[multiply_artifact], # if you want to use a different resource for some task, you can do this way # resource_request=SlurmResourceRequest.from_parameters(slurm_params), ) # Generate the Pegasus DAX file dax_file = write_workflow_description(tmp_path) submit_script = tmp_path / "submit_script.sh" # Our attempt at an easy submit file, it MAY NOT be accurate for more complicated # workflows but it # does work for this simple example. # See https://github.com/isi-vista/vista-pegasus-wrapper/issues/27 build_submit_script( submit_script, str(dax_file), experiment_directory(), # pylint:disable=protected-access )
def example_workflow(params: Parameters): """ An example script to generate a workflow for submission to Pegasus. """ tmp_path = params.creatable_directory("example_root_dir") # Generating parameters for initializing a workflow # We recommend making workflow directory, site, and partition parameters # in an research workflow workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "home_dir": str(tmp_path), "partition": "scavenge", }) workflow_params = workflow_params.unify(params) # Our source input for the sample jobs multiply_input_file = tmp_path / "raw_nums.txt" random = Random() random.seed(0) nums = [int(random.random() * 100) for _ in range(0, 25)] multiply_output_file = tmp_path / "multiplied_nums.txt" sorted_output_file = tmp_path / "sorted_nums.txt" add_output_file = tmp_path / "add_nums.txt" # Base Job Locator job_locator = Locator(("jobs", )) # Write a list of numbers out to be able to run the workflow with multiply_input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) initialize_vista_pegasus_wrapper(workflow_params) multiply_artifact = ValueArtifact( multiply_output_file, depends_on=run_python_on_parameters( job_locator / "multiply", multiply_by_x, { "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4, "logfile": str(tmp_path / "multiply_log.txt"), }, depends_on=[], ), locator=Locator("multiply"), ) # You can also just track the dep node itself to pass to a future job if you don't # need the value portion of an artifacy mul_dep = run_python_on_parameters( job_locator / "sort", sort_nums_in_file, { "input_file": multiply_output_file, "output_file": sorted_output_file }, depends_on=[multiply_artifact], # if you want to use a different resource for some task, you can do this way # resource_request=SlurmResourceRequest.from_parameters(slurm_params), ) run_python_on_args( job_locator / "add", add_y, set_args=f"{sorted_output_file} {add_output_file} --y 10", depends_on=[mul_dep], category="add", # Can be used as a custom category for job limits ) # If you want to limit the number of active jobs in a category use the following # limit_jobs_for_category("scavenge", 1) # Generate the Pegasus DAX file & a Submit Script write_workflow_description(tmp_path)
def test_dax_with_python_into_container_jobs(tmp_path): docker_tar = Path(f"{tmp_path}/docker/tar.tar") docker_build_dir = tmp_path docker_image_name = "pegasus_wrapper_container_demo" docker_image_tag = "0.2" # Generating parameters for initializing a workflow # We recommend making workflow directory, site, and partition parameters # in an research workflow workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "home_dir": str(tmp_path), "partition": "scavenge", }) saga31_request = SlurmResourceRequest.from_parameters( Parameters.from_mapping({ "run_on_single_node": "saga31", "partition": "gaia" })) # Our source input for the sample jobs input_file = tmp_path / "raw_nums.txt" add_y_output_file_nas = tmp_path / "nums_y.txt" sorted_output_file_nas = tmp_path / "sorted.txt" random = Random() random.seed(0) nums = [int(random.random() * 100) for _ in range(0, 25)] # Base Job Locator job_locator = Locator(("jobs", )) docker_python_root = Path("/home/app/") # Write a list of numbers out to be able to run the workflow with input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) initialize_vista_pegasus_wrapper(workflow_params) build_container_locator = job_locator / "build_docker" build_container = run_bash( build_container_locator, command=[ "mkdir -p /scratch/dockermount/pegasus_wrapper_tmp", f"cd {docker_build_dir}", f"docker build . -t {docker_image_name}:{docker_image_tag}", f"docker save -o /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_image_name}:{docker_image_tag}", f"cp /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_tar.absolute()}", f"chmod go+r {docker_tar.absolute()}", ], depends_on=[], resource_request=saga31_request, ) build_container_dir = directory_for(build_container_locator) assert (build_container_dir / "script.sh").exists() python36 = add_container( f"{docker_image_name}:{docker_image_tag}", "docker", str(docker_tar.absolute()), image_site="saga", bypass_staging=True, ) job_profile = PegasusProfile(namespace="pegasus", key="transfer.bypass.input.staging", value="True") mongo4_4 = add_container("mongo:4.4", "docker", "path/to/tar.tar", image_site="saga", bypass_staging=True) with pytest.raises(RuntimeError): _ = stop_docker_as_service(mongo4_4, depends_on=[], resource_request=saga31_request) start_mongo = start_docker_as_service( mongo4_4, depends_on=[build_container], docker_args=f"-v /scratch/mongo/data/db:/data/db", resource_request=saga31_request, ) mongo4_4_dir = directory_for(Locator(("containers", mongo4_4.name))) assert (mongo4_4_dir / "start.sh").exists() assert (mongo4_4_dir / "stop.sh").exists() add_y_locator = job_locator / "add" add_y_job = run_python_on_args( add_y_locator, docker_python_root / "add_y.py", set_args=f"{input_file} {add_y_output_file_nas} --y 10", depends_on=[build_container], job_profiles=[job_profile], resource_request=saga31_request, container=python36, input_file_paths=[input_file], output_file_paths=[add_y_output_file_nas], ) add_y_dir = directory_for(add_y_locator) assert (add_y_dir / "___run.sh").exists() with pytest.raises(RuntimeError): _ = run_python_on_args( add_y_locator, docker_python_root / "add_y.py", set_args=f"{input_file} {add_y_output_file_nas} --y 10", depends_on=[build_container], job_profiles=[job_profile], resource_request=saga31_request, container=python36, input_file_paths=[input_file, input_file], output_file_paths=[add_y_output_file_nas], ) sort_job_locator = job_locator / "sort" sort_job = run_python_on_parameters( sort_job_locator, sort_nums_main, { "input_file": add_y_output_file_nas, "output_file": sorted_output_file_nas }, depends_on=[add_y_job], container=python36, job_profiles=[job_profile], resource_request=saga31_request, input_file_paths=add_y_output_file_nas, output_file_paths=sorted_output_file_nas, ) assert sort_job == run_python_on_parameters( sort_job_locator, sort_nums_main, { "input_file": add_y_output_file_nas, "output_file": sorted_output_file_nas }, depends_on=[add_y_job], container=python36, job_profiles=[job_profile], resource_request=saga31_request, input_file_paths=add_y_output_file_nas, output_file_paths=sorted_output_file_nas, ) sort_job_dir = directory_for(sort_job_locator) assert (sort_job_dir / "___run.sh").exists() assert (sort_job_dir / "____params.params").exists() with pytest.raises(RuntimeError): _ = run_python_on_parameters( sort_job_locator, sort_nums_main, { "input_file": add_y_output_file_nas, "output_file": sorted_output_file_nas }, depends_on=[add_y_job], container=python36, job_profiles=[job_profile], resource_request=saga31_request, input_file_paths=add_y_output_file_nas, output_file_paths=[sorted_output_file_nas, sorted_output_file_nas], ) celebration_bash_locator = job_locator / "celebrate" celebration_bash = run_bash( celebration_bash_locator, 'echo "Jobs Runs Successfully"', depends_on=[sort_job], job_profiles=[job_profile], ) assert celebration_bash == run_bash( celebration_bash_locator, 'echo "Jobs Runs Successfully"', depends_on=[sort_job], job_profiles=[job_profile], ) celebration_bash_dir = directory_for(celebration_bash_locator) assert (celebration_bash_dir / "script.sh").exists() _ = stop_docker_as_service(mongo4_4, depends_on=[start_mongo, sort_job], resource_request=saga31_request) # Generate the Pegasus DAX file & a Submit Script dax_file_one = write_workflow_description(tmp_path) assert dax_file_one.exists() submit_script_one = tmp_path / "submit.sh" assert submit_script_one.exists()
def test_dax_with_job_on_saga_with_dict_as_params(tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "experiment_name": "fred", "home_dir": str(tmp_path), }) slurm_params = Parameters.from_mapping({ "partition": "gaia", "num_cpus": 1, "num_gpus": 0, "memory": "4G" }) multiply_input_file = tmp_path / "raw_nums.txt" random = Random() random.seed(0) nums = immutableset(int(random.random() * 100) for _ in range(25)) multiply_output_file = tmp_path / "multiplied_nums.txt" sorted_output_file = tmp_path / "sorted_nums.txt" add_output_file = tmp_path / "add_nums.txt" with multiply_input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) multiply_params = { "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4, } sort_params = { "input_file": multiply_output_file, "output_file": sorted_output_file } add_args = f"{sorted_output_file} {add_output_file} --y 10" job_profile = PegasusProfile(namespace="pegasus", key="transfer.bypass.input.staging", value="True") resources = SlurmResourceRequest.from_parameters(slurm_params) initialize_vista_pegasus_wrapper(workflow_params) multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_artifact = ValueArtifact( multiply_output_file, depends_on=run_python_on_parameters( multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[], job_profiles=[job_profile], ), locator=Locator("multiply"), ) multiple_dir = directory_for(multiply_job_name) assert (multiple_dir / "___run.sh").exists() assert (multiple_dir / "____params.params").exists() sort_job_name = Locator(_parse_parts("jobs/sort")) sort_dir = directory_for(sort_job_name) sort_artifact = run_python_on_parameters( sort_job_name, sort_nums_main, sort_params, depends_on=[multiply_artifact], resource_request=resources, category="add", ) assert (sort_dir / "___run.sh").exists() assert (sort_dir / "____params.params").exists() add_job_name = Locator(_parse_parts("jobs/add")) add_dir = directory_for(add_job_name) run_python_on_args(add_job_name, "add_job_main.py", add_args, depends_on=[sort_artifact]) assert (add_dir / "___run.sh").exists() dax_file_one = write_workflow_description(tmp_path) dax_file_two = write_workflow_description() assert dax_file_one.exists() assert dax_file_two.exists() submit_script_one = tmp_path / "submit_script_one.sh" submit_script_two = tmp_path / "submit_script_two.sh" build_submit_script(submit_script_one, str(dax_file_one), experiment_directory()) build_submit_script(submit_script_two, str(dax_file_two), experiment_directory()) assert submit_script_one.exists() assert submit_script_two.exists() site_catalog = workflow_params.existing_directory( "workflow_directory") / "sites.yml" assert site_catalog.exists() replica_catalog = ( workflow_params.existing_directory("workflow_directory") / "replicas.yml") assert replica_catalog.exists() transformations_catalog = ( workflow_params.existing_directory("workflow_directory") / "transformations.yml") assert transformations_catalog.exists() properties_file = ( workflow_params.existing_directory("workflow_directory") / "pegasus.properties") assert properties_file.exists()
def test_dax_with_job_in_container(tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "experiment_name": "fred", "home_dir": str(tmp_path), }) slurm_params = Parameters.from_mapping({ "partition": "gaia", "num_cpus": 1, "num_gpus": 0, "memory": "4G" }) multiply_input_file = tmp_path / "raw_nums.txt" random = Random() random.seed(0) nums = immutableset(int(random.random() * 100) for _ in range(25)) multiply_output_file = tmp_path / "multiplied_nums.txt" sorted_output_file = tmp_path / "sorted_nums.txt" with multiply_input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) sort_params = Parameters.from_mapping({ "input_file": multiply_output_file, "output_file": sorted_output_file }) resources = SlurmResourceRequest.from_parameters(slurm_params) initialize_vista_pegasus_wrapper(workflow_params) # Add Container example_docker = add_container("example_container", "docker", tmp_path / "docker.img") with pytest.raises(ValueError): _ = add_container("fake_container", "invalid", tmp_path / "invalid_docker.img") multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_artifact = ValueArtifact( multiply_output_file, depends_on=run_python_on_parameters( multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[], container=example_docker, ), locator=Locator("multiply"), ) multiple_dir = directory_for(multiply_job_name) assert (multiple_dir / "___run.sh").exists() assert (multiple_dir / "____params.params").exists() sort_job_name = Locator(_parse_parts("jobs/sort")) sort_dir = directory_for(sort_job_name) run_python_on_parameters( sort_job_name, sort_nums_main, sort_params, depends_on=[multiply_artifact], resource_request=resources, container=example_docker, ) assert (sort_dir / "___run.sh").exists() assert (sort_dir / "____params.params").exists() dax_file_one = write_workflow_description() assert dax_file_one.exists() site_catalog = workflow_params.existing_directory( "workflow_directory") / "sites.yml" assert site_catalog.exists() replica_catalog = ( workflow_params.existing_directory("workflow_directory") / "replicas.yml") assert replica_catalog.exists() transformations_catalog = ( workflow_params.existing_directory("workflow_directory") / "transformations.yml") assert transformations_catalog.exists() properties_file = ( workflow_params.existing_directory("workflow_directory") / "pegasus.properties") assert properties_file.exists()
def object_language_ablation_runner_entry_point(params: Parameters) -> None: """This function creates all possible object language ablation param files within a given range""" initialize_vista_pegasus_wrapper(params) baseline_parameters = params.namespace("object_language_ablation") pursuit_resource_request_params = params.namespace( "pursuit_resource_request") # get the minimum and maximum number of objects in a scene min_num_objects = params.integer("min_num_objects", default=1) max_num_objects = params.integer("max_num_objects", default=7) # get the minimum and maximum accuracy of the language with the situation min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1) max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5) num_language_accuracy_increment = params.integer( "num_language_accuracy_increment", default=5) values_for_accuracy = np.linspace(min_language_accuracy, max_language_accuracy, num_language_accuracy_increment) limit_jobs_for_category( "pursuit", params.integer("num_pursuit_learners_active", default=8)) for num_objects in range(min_num_objects, max_num_objects + 1): for language_accuracy in values_for_accuracy: for learner_type in LEARNER_VALUES_TO_PARAMS: for params_str, learner_params in LEARNER_VALUES_TO_PARAMS[ learner_type]: experiment_name_string = EXPERIMENT_NAME_FORMAT.format( num_objects=num_objects, language_accuracy=language_accuracy, learner_type=learner_type, learner_params=params_str, ) experiment_name = Locator( experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify( FIXED_PARAMETERS ).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "train_curriculum": { "accurate_language_percentage": float(language_accuracy) }, "object_learner_type": learner_type, "object_learner": learner_params, # We subtract one because the target object is a given "num_noise_objects": num_objects - 1, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[], resource_request=SlurmResourceRequest.from_parameters( pursuit_resource_request_params) if learner_type == "pursuit" else None, category=learner_type, ) write_workflow_description()
def test_category_max_jobs(tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "home_dir": str(tmp_path), }) multiply_slurm_params = Parameters.from_mapping({ "partition": "gaia", "num_cpus": 1, "num_gpus": 0, "memory": "4G" }) multiply_resources = SlurmResourceRequest.from_parameters( multiply_slurm_params) initialize_vista_pegasus_wrapper(workflow_params) multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_output_file = tmp_path / "multiplied_nums.txt" multiply_input_file = tmp_path / "raw_nums.txt" multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) multiply_artifact = ValueArtifact( multiply_output_file, depends_on=run_python_on_parameters( multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[], resource_request=multiply_resources, ), locator=Locator("multiply"), ) sort_slurm_params = Parameters.from_mapping({ "partition": "ephemeral", "num_cpus": 1, "num_gpus": 0, "memory": "4G", "job_time_in_minutes": 120, }) sort_resources = SlurmResourceRequest.from_parameters(sort_slurm_params) sort_job_name = Locator(_parse_parts("jobs/sort")) sorted_output_file = tmp_path / "sorted_nums.txt" sort_params = Parameters.from_mapping({ "input_file": multiply_output_file, "output_file": sorted_output_file }) run_python_on_parameters( sort_job_name, sort_nums_main, sort_params, depends_on=[multiply_artifact], resource_request=sort_resources, ) limit_jobs_for_category("gaia", 1) write_workflow_description() site_catalog = workflow_params.existing_directory( "workflow_directory") / "sites.yml" assert site_catalog.exists() replica_catalog = ( workflow_params.existing_directory("workflow_directory") / "replicas.yml") assert replica_catalog.exists() transformations_catalog = ( workflow_params.existing_directory("workflow_directory") / "transformations.yml") assert transformations_catalog.exists() properties_file = ( workflow_params.existing_directory("workflow_directory") / "pegasus.properties") assert properties_file.exists() # Make sure the config contains the appropriate maxjobs lines and no inappropriate maxjobs lines with properties_file.open("r") as f: lines = f.readlines() for line in lines: print(line) assert any("dagman.gaia.maxjobs = 1" in line for line in lines) assert all("dagman.ephemeral.maxjobs =" not in line for line in lines)
def test_dax_with_saga_categories(tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "home_dir": str(tmp_path), }) multiply_partition = "gaia" multiply_slurm_params = Parameters.from_mapping({ "partition": multiply_partition, "num_cpus": 1, "num_gpus": 0, "memory": "4G" }) multiply_resources = SlurmResourceRequest.from_parameters( multiply_slurm_params) initialize_vista_pegasus_wrapper(workflow_params) multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_output_file = tmp_path / "multiplied_nums.txt" multiply_input_file = tmp_path / "raw_nums.txt" multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) multiply_artifact = ValueArtifact( multiply_output_file, depends_on=run_python_on_parameters( multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[], resource_request=multiply_resources, ), locator=Locator("multiply"), ) sort_partition = "lestat" sort_slurm_params = Parameters.from_mapping({ "partition": sort_partition, "num_cpus": 1, "num_gpus": 0, "memory": "4G" }) sort_resources = SlurmResourceRequest.from_parameters(sort_slurm_params) sort_job_name = Locator(_parse_parts("jobs/sort")) sorted_output_file = tmp_path / "sorted_nums.txt" sort_params = Parameters.from_mapping({ "input_file": multiply_output_file, "output_file": sorted_output_file }) run_python_on_parameters( sort_job_name, sort_nums_main, sort_params, depends_on=[multiply_artifact], resource_request=sort_resources, ) dax_file = write_workflow_description() assert dax_file.exists() # Check that the multiply and sort jobs have the appropriate partition-defined categories set in # the DAX file assert _job_in_dax_has_category(dax_file, multiply_job_name, multiply_partition) assert not _job_in_dax_has_category(dax_file, multiply_job_name, sort_partition) assert _job_in_dax_has_category(dax_file, sort_job_name, sort_partition) assert not _job_in_dax_has_category(dax_file, sort_job_name, multiply_partition)
def test_dax_with_checkpointed_jobs_on_saga(tmp_path): workflow_params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "home_dir": str(tmp_path), }) slurm_params = Parameters.from_mapping({ "partition": "gaia", "num_cpus": 1, "num_gpus": 0, "memory": "4G" }) resources = SlurmResourceRequest.from_parameters(slurm_params) initialize_vista_pegasus_wrapper(workflow_params) multiply_job_name = Locator(_parse_parts("jobs/multiply")) multiply_output_file = tmp_path / "multiplied_nums.txt" multiply_input_file = tmp_path / "raw_nums.txt" multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) multiple_dir = directory_for(multiply_job_name) # Create checkpointed file so that when trying to create the job again, # Pegasus just adds the file to the Replica Catalog checkpointed_multiply_file = multiple_dir / "___ckpt" checkpointed_multiply_file.touch() multiply_output_file.touch() assert checkpointed_multiply_file.exists() assert multiply_output_file.exists() multiply_artifact = ValueArtifact( multiply_output_file, depends_on=run_python_on_parameters(multiply_job_name, multiply_by_x_main, multiply_params, depends_on=[]), locator=Locator("multiply"), ) sort_job_name = Locator(_parse_parts("jobs/sort")) sorted_output_file = tmp_path / "sorted_nums.txt" sort_params = Parameters.from_mapping({ "input_file": multiply_output_file, "output_file": sorted_output_file }) run_python_on_parameters( sort_job_name, sort_nums_main, sort_params, depends_on=[multiply_artifact], resource_request=resources, ) write_workflow_description() site_catalog = workflow_params.existing_directory( "workflow_directory") / "sites.yml" assert site_catalog.exists() replica_catalog = ( workflow_params.existing_directory("workflow_directory") / "replicas.yml") assert replica_catalog.exists() transformations_catalog = ( workflow_params.existing_directory("workflow_directory") / "transformations.yml") assert transformations_catalog.exists() properties_file = ( workflow_params.existing_directory("workflow_directory") / "pegasus.properties") assert properties_file.exists() # Make sure the Replica Catalog is not empty assert replica_catalog.stat().st_size > 0
def integrated_experiment_entry_point(params: Parameters) -> None: initialize_vista_pegasus_wrapper(params) baseline_parameters = params.namespace("integrated_learners_experiment") pursuit_resource_request_params = params.namespace( "pursuit_resource_request") # This code is commented out but may be used in the near future to add language ablation # Capabilities to this curriculum. # get the minimum and maximum accuracy of the language with the situation # min_language_accuracy = params.floating_point("min_language_accuracy", default=0.1) # max_language_accuracy = params.floating_point("max_language_accuracy", default=0.5) # num_language_accuracy_increment = params.integer( # "num_language_accuracy_increment", default=5 # ) # values_for_accuracy = np.linspace( # min_language_accuracy, max_language_accuracy, num_language_accuracy_increment # ) # Get if attributes or relations should be included include_attributes = params.boolean("include_attributes", default=True) include_relations = params.boolean("include_relations", default=True) limit_jobs_for_category( "pursuit_job_limit", params.integer("num_pursuit_learners_active", default=8)) curriculum_repository_path = params.creatable_directory( "curriculum_repository_path") # Job to build desired curriculum(s) which our learners use curriculum_dependencies = immutableset(( CURRICULUM_NAME_FORMAT.format( noise=add_noise, shuffled=shuffle, relations=include_relations, attributes=include_attributes, ), run_python_on_parameters( Locator( CURRICULUM_NAME_FORMAT.format( noise=add_noise, shuffled=shuffle, relations=include_relations, attributes=include_attributes, ).split("-")), generate_curriculum_script, baseline_parameters.unify({ "train_curriculum": Parameters.from_mapping(CURRICULUM_PARAMS).unify( { "add_noise": add_noise, "shuffled": shuffle, "include_attributes": include_attributes, "include_relations": include_relations, }).as_mapping() }).unify(FIXED_PARAMETERS).unify( {"curriculum_repository_path": curriculum_repository_path}), depends_on=[], ), Parameters.from_mapping(CURRICULUM_PARAMS).unify( { "add_noise": add_noise, "shuffled": shuffle, "include_attributes": include_attributes, "include_relations": include_relations, }), ) for add_noise in (True, False) for shuffle in (True, False)) # jobs to build experiment for (curriculum_str, curriculum_dep, curr_params) in curriculum_dependencies: object_learner_type = params.string( "object_learner.learner_type", valid_options=["pursuit", "subset", "pbv"], default="pursuit", ) attribute_learner_type = params.string( "attribute_learner.learner__type", valid_options=["none", "pursuit", "subset"], default="pursuit", ) relation_learner_type = params.string( "relation_learner.learner_type", valid_options=["none", "pursuit", "subset"], default="pursuit", ) experiment_name_string = EXPERIMENT_NAME_FORMAT.format( curriculum_name=curriculum_str.replace("-", "+"), object_learner=object_learner_type, attribute_learner=attribute_learner_type, relation_learner=relation_learner_type, ) experiment_name = Locator(experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify(FIXED_PARAMETERS).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "load_from_curriculum_repository": curriculum_repository_path, "train_curriculum": curr_params, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[curriculum_dep], resource_request=SlurmResourceRequest.from_parameters( pursuit_resource_request_params) if "pursuit" in [ object_learner_type, attribute_learner_type, relation_learner_type ] else None, category="pursuit" if "pursuit" in [ object_learner_type, attribute_learner_type, relation_learner_type ] else "subset", use_pypy=True, ) write_workflow_description()
def main(params: Parameters): adam_root = params.existing_directory("adam_root") m13_experiments_dir = adam_root / "parameters" / "experiments" / "m13" use_pegasus = params.boolean("use_pegasus", default=False) if use_pegasus: initialize_vista_pegasus_wrapper(params) param_files: List[Path] = [] if params.boolean("include_objects", default=True): param_files.append(m13_experiments_dir / "objects.params") if params.boolean("include_imprecise_size", default=True): param_files.append(m13_experiments_dir / "imprecise_size.params") if params.boolean("include_imprecise_temporal", default=True): param_files.append(m13_experiments_dir / "imprecise_temporal.params") if params.boolean("include_subtle_verb", default=True): param_files.append(m13_experiments_dir / "subtle_verb.params") if params.boolean("include_object_restrictions", default=True): param_files.append(m13_experiments_dir / "object_restrictions.params") if params.boolean("include_functionally_defined_objects", default=True): param_files.append(m13_experiments_dir / "functionally_defined_objects.params") if params.boolean("include_relations", default=True): param_files.append(m13_experiments_dir / "relations.params") if params.boolean("include_generics", default=True): param_files.append(m13_experiments_dir / "generics.params") if params.boolean("include_verbs_with_dynamic_prepositions", default=True): param_files.append( m13_experiments_dir / "events_with_dynamic_prepositions.params" ) if params.boolean("include_m9_complete", default=False): param_files.append(m13_experiments_dir / "m9_complete.params") if params.boolean("include_m13_complete", default=False): param_files.append(m13_experiments_dir / "m13_complete.params") if params.boolean("include_m13_shuffled", default=False): param_files.append(m13_experiments_dir / "m13_shuffled.params") # This activates a special "debug" curriculum, # which is meant to be edited in the code by a developer to do fine-grained debugging. if params.boolean("include_debug", default=False): param_files.append(m13_experiments_dir / "debug.params") # If any of the param files don't exist, bail out earlier instead of making the user # wait for the error. for param_file in param_files: if not param_file.exists(): raise RuntimeError(f"Expected param file {param_file} does not exist") for param_file in param_files: logging.info("Running %s", param_file) experiment_params = YAMLParametersLoader().load(param_file) if not use_pegasus: log_experiment_entry_point(experiment_params) else: experiment_name = Locator(experiment_params.string("experiment")) experiment_params = experiment_params.unify( { "experiment_group_dir": directory_for(experiment_name) / "output", "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", # State pickles will go under experiment_name/learner_state "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "log_hypothesis_every_n_steps": params.integer( "save_state_every_n_steps" ), "debug_learner_pickling": params.boolean( "debug_learner_pickling", default=False ), } ) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[] ) if use_pegasus: write_workflow_description()
def gaze_ablation_runner_entry_point(params: Parameters) -> None: """This function creates all possible gaze ablation param files within a given range""" initialize_vista_pegasus_wrapper(params) # Get the baseline experiment parameters for gaze ablation -- these are things common to all of # the experiments, like: # # include_image_links: true # sort_learner_descriptions_by_length: True # num_pretty_descriptions: 5 baseline_parameters = params.namespace("gaze_ablation") # get the minimum and maximum number of objects in a scene min_num_objects = params.integer("min_num_objects", default=1) max_num_objects = params.integer("max_num_objects", default=7) # this gets the number of different accuracies to try; default = increment by 0.1 num_accuracy_increments = params.integer("num_increments", default=11) values_for_accuracy = np.linspace(0, 1, num_accuracy_increments) # the number of noise instances to be included min_num_noise_instances = params.integer("min_num_noise", default=0) max_num_noise_instances = params.integer("max_num_noise", default=0) # get the number of instances in the entire curriculum min_num_instances_in_curriculum = params.integer("min_instances", default=10) max_num_instances_in_curriculum = params.integer("max_instances", default=20) # all possible numbers of noise instances for num_noise_instances in range(min_num_noise_instances, max_num_noise_instances + 1): # all possible numbers of instances in the curriculum for num_instances in range(min_num_instances_in_curriculum, max_num_instances_in_curriculum + 1): # all possible numbers of instances for num_objects_in_instance in range(min_num_objects, max_num_objects + 1): # all possible accuracies for prob_given in values_for_accuracy: for prob_not_given in values_for_accuracy: # both ignoring and perceiving gaze for add_gaze in [True, False]: # Define the experiment name, which is used both as a job name and to # choose a directory in which to store the experiment results. experiment_name_string = EXPERIMENT_NAME_FORMAT.format( num_instances=num_instances, num_noise_instances=num_noise_instances, num_objects_in_instance=num_objects_in_instance, prob_given=prob_given, prob_not_given=prob_not_given, add_gaze=add_gaze, ) experiment_name = Locator( experiment_name_string.split("-")) # Note that the input parameters should include the root params and # anything else we want. experiment_params = baseline_parameters.unify( FIXED_PARAMETERS).unify({ "experiment": experiment_name_string, "experiment_group_dir": directory_for(experiment_name), "hypothesis_log_dir": directory_for(experiment_name) / "hypotheses", "learner_logging_path": directory_for(experiment_name), "log_learner_state": True, "resume_from_latest_logged_state": True, "pursuit-curriculum-params": { "num_instances": num_instances, "num_noise_instances": num_noise_instances, "num_objects_in_instance": num_objects_in_instance, "add_gaze": add_gaze, "prob_given": float(prob_given), "prob_not_given": float(prob_not_given), }, }) run_python_on_parameters( experiment_name, log_experiment_script, experiment_params, depends_on=[], ) write_workflow_description()
def test_dax_test_exclude_nodes_on_saga(tmp_path): sample_exclude = "saga01,saga03,saga21,saga05" sample_include = "saga06" params = Parameters.from_mapping({ "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "partition": "gaia", "exclude_list": sample_exclude, "home_dir": str(tmp_path), }) slurm_params = Parameters.from_mapping({ "partition": "gaia", "num_cpus": 1, "num_gpus": 0, "memory": "4G" }) multiply_input_file = tmp_path / "raw_nums.txt" random = Random() random.seed(0) nums = immutableset(int(random.random() * 100) for _ in range(0, 25)) multiply_output_file = tmp_path / "multiplied_nums.txt" sorted_output_file = tmp_path / "sorted_nums.txt" with multiply_input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) multiply_params = Parameters.from_mapping({ "input_file": multiply_input_file, "output_file": multiply_output_file, "x": 4 }) sort_params = Parameters.from_mapping({ "input_file": multiply_output_file, "output_file": sorted_output_file }) resources = SlurmResourceRequest.from_parameters( slurm_params.unify({"run_on_single_node": sample_include})) initialize_vista_pegasus_wrapper(params) multiply_job_locator = Locator(_parse_parts("jobs/multiply")) multiply_artifact = ValueArtifact( multiply_output_file, depends_on=run_python_on_parameters(multiply_job_locator, multiply_by_x_main, multiply_params, depends_on=[]), locator=Locator("multiply"), ) sort_job_locator = Locator(_parse_parts("jobs/sort")) run_python_on_parameters( sort_job_locator, sort_nums_main, sort_params, depends_on=[multiply_artifact], resource_request=resources, ) dax_file = write_workflow_description(tmp_path) with dax_file.open("r") as dax: dax_yaml = load(dax, Loader=SafeLoader) root = dax_yaml["jobs"] for item in root: if item["type"] == "job": if "pegasus" in item["profiles"]: if item["name"] == "jobs_multiply": assert (f"--exclude={sample_exclude}" in item["profiles"]["pegasus"]["glite.arguments"]) elif item["name"] == "jobs_sort": assert "--exclude=" in item["profiles"]["pegasus"][ "glite.arguments"] assert (f"--nodelist={sample_include}" in item["profiles"]["pegasus"]["glite.arguments"]) else: assert False