def test_upload_files(): script_hash = dummy_hash("script_to_run.py") upload_mapping, spec = make_spec_from_command( ["^script_to_run.py"], docker_image="us.gcr.io/bucket/dockerimage", dest_url="s3://bucket/dest", cas_url="s3://bucket/cas", hash_function=dummy_hash, is_executable_function=lambda fn: fn.startswith("script")) assert upload_mapping == { "script_to_run.py": "s3://bucket/cas/" + script_hash } assert spec == { "image": "us.gcr.io/bucket/dockerimage", "common": { "uploads": [{ "src_wildcard": "*", "dst_url": "s3://bucket/dest" }] }, "tasks": [{ "downloads": [{ "src_url": "s3://bucket/cas/" + script_hash, "dst": "script_to_run.py", "executable": True }], "command": "script_to_run.py" }] }
def submit_cmd(jq, io, args, config): if args.image: image = args.image else: image = config['default_image'] job_id = args.name if job_id is None: job_id = new_job_id() cas_url_prefix = config['cas_url_prefix'] default_url_prefix = config['default_url_prefix'] if args.file: assert len(args.command) == 0 spec = json.load(open(args.file, "rt")) else: if args.seq is not None: parameters = [{"i": str(i)} for i in range(args.seq)] elif args.params is not None: parameters = read_parameters_from_csv(args.params) else: parameters = [{}] assert len(args.command) != 0 hash_db = CachingHashFunction( config.get("cache_db_path", ".kubeque-cached-file-hashes")) upload_map, spec = make_spec_from_command( args.command, image, dest_url=default_url_prefix + job_id, cas_url=cas_url_prefix, parameters=parameters, resources=args.resources, hash_function=hash_db.hash_filename, extra_files=expand_files_to_upload(args.push)) hash_db.persist() log.info("upload_map = %s", upload_map) for filename, dest in upload_map.items(): io.put(filename, dest, skip_if_exists=True) log.debug("spec: %s", json.dumps(spec, indent=2)) submit(jq, io, job_id, spec, args.dryrun, config, args.skip_kube_submit) if not (args.dryrun or args.skip_kube_submit) and args.wait_for_completion: log.info("Waiting for job to terminate") watch(jq, job_id) if args.fetch: log.info("Job completed, downloading results to %s", args.fetch) fetch_cmd_(jq, io, job_id, args.fetch) else: log.info( "Job completed. You can download results by executing: kubeque fetch %s DEST_DIR", job_id)
def test_parameterized(): script1_hash = dummy_hash("script1") script2_hash = dummy_hash("script2") upload_mapping, spec = make_spec_from_command( ["python", "^{script_name}", "{parameter}"], docker_image="us.gcr.io/bucket/dockerimage", dest_url="s3://bucket/dest", cas_url="s3://bucket/cas", parameters=[ dict(script_name="script1", parameter="a"), dict(script_name="script2", parameter="b") ], hash_function=dummy_hash, is_executable_function=lambda fn: fn.startswith("script")) assert upload_mapping == { "script1": "s3://bucket/cas/" + script1_hash, "script2": "s3://bucket/cas/" + script2_hash } assert spec == { "image": "us.gcr.io/bucket/dockerimage", "common": { "uploads": [{ "src_wildcard": "*", "dst_url": "s3://bucket/dest" }] }, "tasks": [{ "downloads": [{ "src_url": "s3://bucket/cas/" + script1_hash, "dst": "script1", "executable": True }], "command": "python script1 a" }, { "downloads": [{ "src_url": "s3://bucket/cas/" + script2_hash, "dst": "script2", "executable": True }], "command": "python script2 b" }] }
def test_simple_command(): upload_mapping, spec = make_spec_from_command( ["bash", "-c", "date"], docker_image="us.gcr.io/bucket/dockerimage", dest_url="s3://bucket/dest") assert upload_mapping == {} assert spec == { "image": "us.gcr.io/bucket/dockerimage", "common": { "uploads": [{ "src_wildcard": "*", "dst_url": "s3://bucket/dest" }] }, "tasks": [{ "command": "bash -c date", "downloads": [] }] }
def submit_cmd(jq, io, cluster, args, config): metadata = {} if args.image: image = args.image else: image = config['default_image'] job_id = args.name if job_id is None: job_id = new_job_id() cas_url_prefix = config['cas_url_prefix'] default_url_prefix = config['default_url_prefix'] if args.file: assert len(args.command) == 0 spec = json.load(open(args.file, "rt")) else: if args.seq is not None: parameters = [{"i": str(i)} for i in range(args.seq)] elif args.params is not None: parameters = read_parameters_from_csv(args.params) else: parameters = [{}] assert len(args.command) != 0 resource_spec = _parse_resources(args.resources) hash_db = CachingHashFunction( config.get("cache_db_path", ".kubeque-cached-file-hashes")) upload_map, spec = make_spec_from_command( args.command, image, dest_url=default_url_prefix + job_id, cas_url=cas_url_prefix, parameters=parameters, resource_spec=resource_spec, hash_function=hash_db.hash_filename, src_wildcards=args.results_wildcards, extra_files=expand_files_to_upload(args.push), working_dir=args.working_dir) kubequeconsume_exe_path = config['kubequeconsume_exe_path'] kubequeconsume_exe_url = add_file_to_upload_map( upload_map, hash_db.hash_filename, cas_url_prefix, kubequeconsume_exe_path, "!KUBEQUECONSUME") hash_db.persist() log.debug("upload_map = %s", upload_map) for filename, dest in upload_map.items(): io.put(filename, dest, skip_if_exists=True) log.debug("spec: %s", json.dumps(spec, indent=2)) submit(jq, io, cluster, job_id, spec, args.dryrun, config, args.skip_kube_submit, metadata, kubequeconsume_exe_url, args.local) finished = False if args.local: # if we ran it within docker, and the docker command completed, then the job is done finished = True else: if not (args.dryrun or args.skip_kube_submit) and args.wait_for_completion: log.info("Waiting for job to terminate") watch(jq, job_id, cluster) finished = True if finished: if args.fetch: log.info( "Done waiting for job to complete, downloading results to %s", args.fetch) fetch_cmd_(jq, io, job_id, args.fetch) else: log.info("Done waiting for job to complete, results written to %s", default_url_prefix + "/" + job_id) log.info( "You can download results via 'gsutil rsync -r %s DEST_DIR'", default_url_prefix + "/" + job_id)