def from_parameters(params: Parameters) -> KeyValueSink[str, bytes]: """ Create a key-value sink writing to a zip file. Right now, these uses all the defaults for `KeyValueSink.zip_bytes_sink`. In the future, we might examine other parameters to allow greater customization. """ return KeyValueSink.zip_bytes_sink(params.creatable_file("path"))
def main(params: Parameters): conda_script_generator = CondaJobScriptGenerator.from_parameters(params) entry_point = params.string("entry_point") work_dir = params.optional_creatable_directory( "working_directory") or Path(os.getcwd()) stdout_file = params.string("log_file") or work_dir / "___stdout.log" shell_script = conda_script_generator.generate_shell_script( entry_point_name=entry_point, param_file=params.existing_file("job_param_file"), working_directory=work_dir, stdout_file=stdout_file, ) params.creatable_file("conda_script_path").write_text( # type: ignore shell_script, encoding="utf-8") if params.boolean("echo_template", default=False): print(shell_script)
def main(params: Parameters): input_file_path = params.existing_file("input_file") output_file_path = params.creatable_file("output_file") logging.info("Reading from input file: %s", str(input_file_path.absolute())) with input_file_path.open() as input_file: nums = [int(x.strip()) for x in input_file if x.strip() != ""] nums.sort() output_file_path.write_text("\n".join(immutableset([str(x) for x in nums])))
def main(params: Parameters): input_file_path = params.existing_file("input_file") output_file_path = params.creatable_file("output_file") x = params.integer("x") logging.info("Reading from input file: %s", str(input_file_path.absolute())) with input_file_path.open() as input_file: with output_file_path.open("w") as output_file: for num in input_file: output_file.write(f"{int(num)*x}\n") logging.info("Writing to output file: %s", str(input_file_path.absolute())) # Pause so that we can examine the job on the SAGA cluster time.sleep(30)
def main(params: Parameters): with byte_key_value_source_from_params(params) as input_source: keys = list(input_source.keys()) num_to_sample = min(params.positive_integer(_NUM_TO_SAMPLE_PARAM), len(keys)) random.shuffle( keys, random=random.Random(params.integer(_RANDOM_SEED_PARAM, default=0)).random, ) keys_to_keep = keys[:num_to_sample] output_zip_path = params.creatable_file("output_zip_path") logging.info("Downsampling %s files to %s", num_to_sample, output_zip_path) with KeyValueSink.zip_bytes_sink(output_zip_path) as out: for key in keys_to_keep: out.put(key, input_source[key])
def main(params: Parameters): graph_def_file = params.existing_file("graph_def_file") checkpoint_glob = params.string("checkpoint_glob") vocab_file = params.existing_file("vocab_file") sentences_file = params.existing_file("sentences_file") output_file = params.creatable_file("output_file") do_profiling = params.optional_boolean_with_default("profile", False) with tensorflow.contrib.tfprof.ProfileContext( os.getcwd(), trace_steps=range(2, 10), dump_steps=range(1, 10, 2), enabled=params.optional_boolean_with_default("profile", False) ): lm = LM1B.load(graph_def_file=graph_def_file, checkpoint_file=checkpoint_glob, vocab=vocab_file) start_time = None num_tokens_processed = 0 with open(sentences_file, 'r', newline='') as inp: csv_input = csv.reader(inp, delimiter='\t') with open(output_file, 'w', newline='') as out: csv_output = csv.writer(out, delimiter='\t') for line in csv_input: tokens = line[0].split(' ') output_row = list(line) output_row.insert(0, lm.log_probability_of_sentence(tokens)) csv_output.writerow(output_row) # we delay till after the first sentence to avoid counting startup time if num_tokens_processed == 0: start_time = time.time() num_tokens_processed += len(tokens) elapsed_time = time.time() - start_time print(f"Processed {num_tokens_processed - 1} sentences in {elapsed_time} " f"seconds, {num_tokens_processed / elapsed_time} tokens per second. First sentence not " f"included in time calculation.")
def example_workflow(params: Parameters): # pragma: no cover """ An example script to generate a container workflow for submission to Pegasus. """ tmp_path = params.creatable_directory("example_root_dir") docker_tar = params.creatable_file("docker_tar") docker_build_dir = params.existing_directory("docker_build_dir") docker_image_name = params.string( "docker_image_name", default="pegasus_wrapper_container_demo" ) docker_image_tag = params.string("docker_image_tag", default="0.2") mongo_db_tar = params.string( "mongo_db_tar", default="/nas/gaia/shared/cluster/docker/mongo-4.4.tar" ) monogo_db_data = "/scratch/dockermount/pegasus_wrapper_tmp/data" mongo_db_config = "/scratch/dockermount/pegasus_wrapper_tmp/config" # Generating parameters for initializing a workflow # We recommend making workflow directory, site, and partition parameters # in an research workflow workflow_params = Parameters.from_mapping( { "workflow_name": "Test", "workflow_created": "Testing", "workflow_log_dir": str(tmp_path / "log"), "workflow_directory": str(tmp_path / "working"), "site": "saga", "namespace": "test", "home_dir": str(tmp_path), "partition": "scavenge", } ) saga31_request = SlurmResourceRequest.from_parameters( Parameters.from_mapping({"run_on_single_node": "saga31", "partition": "gaia"}) ) workflow_params = workflow_params.unify(params) # Our source input for the sample jobs input_file = tmp_path / "raw_nums.txt" add_y_output_file_nas = tmp_path / "nums_y.txt" sorted_output_file_nas = tmp_path / "sorted.txt" random = Random() random.seed(0) nums = [int(random.random() * 100) for _ in range(0, 25)] # Base Job Locator job_locator = Locator(("jobs",)) docker_python_root = Path("/home/app/") job_profile = PegasusProfile( namespace="pegasus", key="transfer.bypass.input.staging", value="True" ) # Write a list of numbers out to be able to run the workflow with input_file.open("w") as mult_file: mult_file.writelines(f"{num}\n" for num in nums) initialize_vista_pegasus_wrapper(workflow_params) build_container = run_bash( job_locator / "build_docker", command=[ "mkdir -p /scratch/dockermount/pegasus_wrapper_tmp", f"cd {docker_build_dir}", f"docker build . -t {docker_image_name}:{docker_image_tag}", f"docker save -o /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_image_name}:{docker_image_tag}", f"cp /scratch/dockermount/pegasus_wrapper_tmp/{docker_tar.name} {docker_tar.absolute()}", f"chmod go+r {docker_tar.absolute()}", f"docker load --input {mongo_db_tar}", f"mkdir -p {monogo_db_data}", f"mkdir -p {mongo_db_config}", ], depends_on=[], resource_request=saga31_request, ) python36 = add_container( f"{docker_image_name}:{docker_image_tag}", "docker", str(docker_tar.absolute()), image_site="saga", bypass_staging=True, ) mongo4_4 = add_container( "mongo:4.4", "docker", mongo_db_tar, image_site="saga", bypass_staging=True ) start_mongo = start_docker_as_service( mongo4_4, depends_on=[build_container], mounts=[f"{monogo_db_data}:/data/db", f"{mongo_db_config}/etc/custom"], docker_args=f"-p 27017:27017", resource_request=saga31_request, ) add_y_job = run_python_on_args( job_locator / "add", docker_python_root / "add_y.py", set_args=f"{input_file} {add_y_output_file_nas} --y 10", depends_on=[build_container], job_profiles=[job_profile], resource_request=saga31_request, container=python36, input_file_paths=[input_file], output_file_paths=[add_y_output_file_nas], ) sort_job = run_python_on_parameters( job_locator / "sort", sort_nums_in_file, {"input_file": add_y_output_file_nas, "output_file": sorted_output_file_nas}, depends_on=[add_y_job], container=python36, job_profiles=[job_profile], resource_request=saga31_request, input_file_paths=add_y_output_file_nas, output_file_paths=sorted_output_file_nas, ) _ = stop_docker_as_service( mongo4_4, depends_on=[start_mongo, sort_job], resource_request=saga31_request ) # Generate the Pegasus DAX file & a Submit Script write_workflow_description(tmp_path)