Пример #1
0
def test_upload_files():
    script_hash = dummy_hash("script_to_run.py")

    upload_mapping, spec = make_spec_from_command(
        ["^script_to_run.py"],
        docker_image="us.gcr.io/bucket/dockerimage",
        dest_url="s3://bucket/dest",
        cas_url="s3://bucket/cas",
        hash_function=dummy_hash,
        is_executable_function=lambda fn: fn.startswith("script"))
    assert upload_mapping == {
        "script_to_run.py": "s3://bucket/cas/" + script_hash
    }
    assert spec == {
        "image":
        "us.gcr.io/bucket/dockerimage",
        "common": {
            "uploads": [{
                "src_wildcard": "*",
                "dst_url": "s3://bucket/dest"
            }]
        },
        "tasks": [{
            "downloads": [{
                "src_url": "s3://bucket/cas/" + script_hash,
                "dst": "script_to_run.py",
                "executable": True
            }],
            "command":
            "script_to_run.py"
        }]
    }
Пример #2
0
def submit_cmd(jq, io, args, config):
    if args.image:
        image = args.image
    else:
        image = config['default_image']

    job_id = args.name
    if job_id is None:
        job_id = new_job_id()

    cas_url_prefix = config['cas_url_prefix']
    default_url_prefix = config['default_url_prefix']

    if args.file:
        assert len(args.command) == 0
        spec = json.load(open(args.file, "rt"))
    else:
        if args.seq is not None:
            parameters = [{"i": str(i)} for i in range(args.seq)]
        elif args.params is not None:
            parameters = read_parameters_from_csv(args.params)
        else:
            parameters = [{}]

        assert len(args.command) != 0

        hash_db = CachingHashFunction(
            config.get("cache_db_path", ".kubeque-cached-file-hashes"))
        upload_map, spec = make_spec_from_command(
            args.command,
            image,
            dest_url=default_url_prefix + job_id,
            cas_url=cas_url_prefix,
            parameters=parameters,
            resources=args.resources,
            hash_function=hash_db.hash_filename,
            extra_files=expand_files_to_upload(args.push))
        hash_db.persist()

        log.info("upload_map = %s", upload_map)
        for filename, dest in upload_map.items():
            io.put(filename, dest, skip_if_exists=True)

    log.debug("spec: %s", json.dumps(spec, indent=2))
    submit(jq, io, job_id, spec, args.dryrun, config, args.skip_kube_submit)

    if not (args.dryrun or args.skip_kube_submit) and args.wait_for_completion:
        log.info("Waiting for job to terminate")
        watch(jq, job_id)
        if args.fetch:
            log.info("Job completed, downloading results to %s", args.fetch)
            fetch_cmd_(jq, io, job_id, args.fetch)
        else:
            log.info(
                "Job completed.  You can download results by executing: kubeque fetch %s DEST_DIR",
                job_id)
Пример #3
0
def test_parameterized():
    script1_hash = dummy_hash("script1")
    script2_hash = dummy_hash("script2")

    upload_mapping, spec = make_spec_from_command(
        ["python", "^{script_name}", "{parameter}"],
        docker_image="us.gcr.io/bucket/dockerimage",
        dest_url="s3://bucket/dest",
        cas_url="s3://bucket/cas",
        parameters=[
            dict(script_name="script1", parameter="a"),
            dict(script_name="script2", parameter="b")
        ],
        hash_function=dummy_hash,
        is_executable_function=lambda fn: fn.startswith("script"))

    assert upload_mapping == {
        "script1": "s3://bucket/cas/" + script1_hash,
        "script2": "s3://bucket/cas/" + script2_hash
    }
    assert spec == {
        "image":
        "us.gcr.io/bucket/dockerimage",
        "common": {
            "uploads": [{
                "src_wildcard": "*",
                "dst_url": "s3://bucket/dest"
            }]
        },
        "tasks": [{
            "downloads": [{
                "src_url": "s3://bucket/cas/" + script1_hash,
                "dst": "script1",
                "executable": True
            }],
            "command":
            "python script1 a"
        }, {
            "downloads": [{
                "src_url": "s3://bucket/cas/" + script2_hash,
                "dst": "script2",
                "executable": True
            }],
            "command":
            "python script2 b"
        }]
    }
Пример #4
0
def test_simple_command():
    upload_mapping, spec = make_spec_from_command(
        ["bash", "-c", "date"],
        docker_image="us.gcr.io/bucket/dockerimage",
        dest_url="s3://bucket/dest")
    assert upload_mapping == {}
    assert spec == {
        "image": "us.gcr.io/bucket/dockerimage",
        "common": {
            "uploads": [{
                "src_wildcard": "*",
                "dst_url": "s3://bucket/dest"
            }]
        },
        "tasks": [{
            "command": "bash -c date",
            "downloads": []
        }]
    }
Пример #5
0
def submit_cmd(jq, io, cluster, args, config):
    metadata = {}

    if args.image:
        image = args.image
    else:
        image = config['default_image']

    job_id = args.name
    if job_id is None:
        job_id = new_job_id()

    cas_url_prefix = config['cas_url_prefix']
    default_url_prefix = config['default_url_prefix']

    if args.file:
        assert len(args.command) == 0
        spec = json.load(open(args.file, "rt"))
    else:
        if args.seq is not None:
            parameters = [{"i": str(i)} for i in range(args.seq)]
        elif args.params is not None:
            parameters = read_parameters_from_csv(args.params)
        else:
            parameters = [{}]

        assert len(args.command) != 0

        resource_spec = _parse_resources(args.resources)

        hash_db = CachingHashFunction(
            config.get("cache_db_path", ".kubeque-cached-file-hashes"))
        upload_map, spec = make_spec_from_command(
            args.command,
            image,
            dest_url=default_url_prefix + job_id,
            cas_url=cas_url_prefix,
            parameters=parameters,
            resource_spec=resource_spec,
            hash_function=hash_db.hash_filename,
            src_wildcards=args.results_wildcards,
            extra_files=expand_files_to_upload(args.push),
            working_dir=args.working_dir)

        kubequeconsume_exe_path = config['kubequeconsume_exe_path']
        kubequeconsume_exe_url = add_file_to_upload_map(
            upload_map, hash_db.hash_filename, cas_url_prefix,
            kubequeconsume_exe_path, "!KUBEQUECONSUME")

        hash_db.persist()

        log.debug("upload_map = %s", upload_map)
        for filename, dest in upload_map.items():
            io.put(filename, dest, skip_if_exists=True)

    log.debug("spec: %s", json.dumps(spec, indent=2))
    submit(jq, io, cluster, job_id, spec, args.dryrun, config,
           args.skip_kube_submit, metadata, kubequeconsume_exe_url, args.local)

    finished = False
    if args.local:
        # if we ran it within docker, and the docker command completed, then the job is done
        finished = True
    else:
        if not (args.dryrun
                or args.skip_kube_submit) and args.wait_for_completion:
            log.info("Waiting for job to terminate")
            watch(jq, job_id, cluster)
            finished = True

    if finished:
        if args.fetch:
            log.info(
                "Done waiting for job to complete, downloading results to %s",
                args.fetch)
            fetch_cmd_(jq, io, job_id, args.fetch)
        else:
            log.info("Done waiting for job to complete, results written to %s",
                     default_url_prefix + "/" + job_id)
            log.info(
                "You can download results via 'gsutil rsync -r %s DEST_DIR'",
                default_url_prefix + "/" + job_id)