Exemplo n.º 1
0
    def test_evalinput(self):
        self.set_examples()
        query = bosh.evaluate(self.desc, self.invo, "inputs/")
        expect = {
            'str_input_list': ["fo '; echo FAIL", 'bar'],
            'str_input': 'coin;plop',
            'config_num': 4,
            'num_input': None,
            'file_input': './setup.py',
            'enum_input': 'val1',
            'list_int_input': [1, 2, 3],
            'flag_input': None
        }
        assert (query == expect)

        query = bosh.evaluate(self.desc, self.invo,
                              "inputs/type=Flag,id=flag_input",
                              "inputs/type=Number")
        expect = [{
            'flag_input': None
        }, {
            'config_num': 4,
            'num_input': None,
            'list_int_input': [1, 2, 3]
        }]
        assert (query == expect)

        query = bosh.evaluate(self.desc, self.invo, "inputs/id=strinputs")
        expect = {}
        assert (query == expect)

        query = bosh.evaluate(self.desc, self.invo,
                              "inputt/nonsense=strinputs")
        expect = {}
        assert (query == expect)
Exemplo n.º 2
0
    def test_evalgroups(self):
        self.set_examples()
        query = bosh.evaluate(self.desc, self.invo, "groups/")
        expect = {'an_example_group': {'num_input': None,
                                       'enum_input': 'val1'}}
        self.assertEqual(query, expect)

        query = bosh.evaluate(self.desc,
                              self.invo,
                              "groups/mutually-exclusive=True")
        expect = {'an_example_group': {'num_input': None,
                                       'enum_input': 'val1'}}
        self.assertEqual(query, expect)
Exemplo n.º 3
0
    def test_evaloutput_from_zenodo(self, mock_get):
        self.set_examples_from_zenodo()
        query = bosh.evaluate(self.desc, self.invo, "output-files/")
        expect = {'logfile': 'log-4-coin;plop.txt',
                  'output_files': 'output/*_exampleOutputTag.resultType',
                  'config_file': './config.txt'}
        self.assertEqual(query, expect)

        query = bosh.evaluate(self.desc, self.invo, "output-files/id=logfile")
        expect = {'logfile': 'log-4-coin;plop.txt'}
        self.assertEqual(query, expect)

        query = bosh.evaluate(self.desc, self.invo, "output-files/id=log-file")
        expect = {}
        self.assertEqual(query, expect)
Exemplo n.º 4
0
    def test_evaloutput_json_obj(self):
        self.set_examples_json_obj()
        query = bosh.evaluate(self.desc, self.invo, "output-files/")
        expect = {'logfile': 'log-4-coin;plop.txt',
                  'output_files': 'output/*_exampleOutputTag.resultType',
                  'config_file': './subdir1/subdir2/config.txt'}
        self.assertEqual(query, expect)

        query = bosh.evaluate(self.desc, self.invo, "output-files/id=logfile")
        expect = {'logfile': 'log-4-coin;plop.txt'}
        self.assertEqual(query, expect)

        query = bosh.evaluate(self.desc, self.invo, "output-files/id=log-file")
        expect = {}
        self.assertEqual(query, expect)
Exemplo n.º 5
0
    def test_evaloutput(self):
        self.set_examples()
        query = bosh.evaluate(self.desc, self.invo, "output-files/")
        expect = {'logfile': 'log-4-coin;plop.txt',
                  'output_files': 'output/*_exampleOutputTag.resultType',
                  'config_file': './config.txt'}
        assert(query == expect)

        query = bosh.evaluate(self.desc, self.invo, "output-files/id=logfile")
        expect = {'logfile': 'log-4-coin;plop.txt'}
        assert(query == expect)

        query = bosh.evaluate(self.desc, self.invo, "output-files/id=log-file")
        expect = {}
        assert(query == expect)
Exemplo n.º 6
0
 def test_evaloutput(self):
     example1_dir = os.path.join(self.get_examples_dir(), "example1")
     desc = os.path.join(example1_dir, "example1_docker.json")
     invo = os.path.join(example1_dir, "invocation.json")
     query = bosh.evaluate(desc, invo, "invalid-query")
     out, err = self.capfd.readouterr()
     self.assertIn("[ ERROR ]", out)
Exemplo n.º 7
0
def test(descriptor, test, invocation):
    # Run pipeline.
    ret = bosh.execute("launch",
                       descriptor,
                       invocation.name,
                       "--skip-data-collection")
    print(ret)

    # Choose appropriate assertion scenario
    assertions = test["assertions"]
    if "exit-code" in assertions:
        assert ret.exit_code == assertions["exit-code"]

    if "output-files" in assertions:

        # Acquiring a hash map of output ids mapping to output file paths.
        outputted = bosh.evaluate(descriptor, invocation.name, "output-files/")

        for output_file in assertions["output-files"]:

            file_path = outputted[output_file["id"]]
            assert op.exists(file_path)

            # Optionaly, an md5 reference may have been specified
            if "md5-reference" in output_file:

                # MD5 checksum comparaison
                output_md5 = compute_md5(file_path)
                reference_md5 = output_file["md5-reference"]
                assert output_md5 == reference_md5
Exemplo n.º 8
0
def test(descriptor, test, invocation, paramsDict):
    arguments = ["launch", descriptor, invocation.name]

    # Add any additional params to arguments
    for flag, value in paramsDict.items():
        arguments.append(flag)
        if value is not None:
            arguments.append(value)

    print(arguments)
    # Run pipeline.
    ret = bosh.execute(*arguments)
    print(ret)

    # Choose appropriate assertion scenario
    assertions = test["assertions"]
    if "exit-code" in assertions:
        assert ret.exit_code == assertions["exit-code"]

    if "output-files" in assertions:

        # Acquiring a hash map of output ids mapping to output file paths.
        outputted = bosh.evaluate(descriptor, invocation.name, "output-files/")

        for output_file in assertions["output-files"]:

            file_path = outputted[output_file["id"]]
            assert op.exists(file_path)

            # Optionaly, an md5 reference may have been specified
            if "md5-reference" in output_file:

                # MD5 checksum comparaison
                output_md5 = compute_md5(file_path)
                reference_md5 = output_file["md5-reference"]
                assert output_md5 == reference_md5
Exemplo n.º 9
0
    def generate_tests(self):

        # Get descriptor as JSON

        desc_JSON = json.loads(self.db_desc.data_file.read())

        # Create new test entries
        test_list = []
        for test_JSON in desc_JSON['tests']:
            test = DescriptorTest()

            test.test_name = test_JSON['name']
            test.descriptor = self.db_desc
            test_list.append(test_JSON['name'])

            # Evaluate the descriptor's command line, using the invocation specified by the test.

            # To perform this evaluation, we need to extract the invocation from the test and put it into a temporary file
            invocation_tmp_file = create_temporary_file(
                json.dumps(test_JSON['invocation']).encode())

            # If the invocation is erroneous, we simply mention it in in the entry itself
            # A wrongfull invocation should however not put a halt to the entire evaluation of a descriptor.
            erroneous_invocation = False
            try:
                bosh.invocation(self.db_desc.data_file.file.name, '-i',
                                invocation_tmp_file.name)
            except:
                erroneous_invocation = True

            # Rewind
            invocation_tmp_file.seek(0)

            if (erroneous_invocation):
                test.evaluated_invocation = "Error: invalid invocation"

            else:

                #test.evaluated_invocation = bosh.evaluate(self.db_desc.data_file.file.name, invocation_tmp_file.name, "command-line/")
                test.evaluated_invocation = get_bosh_cmdline(
                    self.db_desc.data_file.file.name, invocation_tmp_file.name)

            test.save()
            invocation_tmp_file.close()

            # Create assertion entries.
            if test_JSON['assertions'].get('exit-code') != None:

                # Create assertion entry with exit-code
                assertion = DescriptorTestAssertion()
                assertion.test = test
                assertion.operand1 = test_JSON['assertions']['exit-code']
                assertion.type = ASSERTION_EXITCODE
                assertion.save()

            output_files = None
            if (erroneous_invocation == False):
                output_files = bosh.evaluate(self.db_desc.data_file.file.name,
                                             invocation_tmp_file.name,
                                             "output-files/")

            if test_JSON['assertions'].get('output-files') != None:

                for ouput_assertion_JSON in test_JSON['assertions'][
                        'output-files']:

                    assertion = DescriptorTestAssertion()
                    assertion.test = test

                    # Id processing
                    id = ouput_assertion_JSON['id']
                    if erroneous_invocation == True:
                        # Skip the evaluation of this entry because the associated invocation is invalid.
                        assertion.operand1 = "Cannot evaluate: invocation invalid"
                    else:
                        assertion.operand1 = output_files[id]

                    # MD5 reference processing
                    if ouput_assertion_JSON.get('md5-reference') != None:
                        assertion.operand2 = ouput_assertion_JSON[
                            'md5-reference']
                        assertion.type = ASSERTION_OUTPUT_FILE_MATCHES_MD5
                    else:
                        assertion.type = ASSERTION_OUTPUT_FILE_EXISTS

                    assertion.save()

            # We are done filling up the test entry.

        self.db_desc.data_file.seek(0)
Exemplo n.º 10
0
def processTask(metadata, clowdrloc=None, verbose=False, **kwargs):
    # Get metadata
    if clowdrloc is None:
        localtaskdir = "/clowtask/"
    else:
        localtaskdir = clowdrloc

    localtaskdir = op.join(localtaskdir, "clowtask_" + utils.randstring(3))
    if not op.exists(localtaskdir):
        os.makedirs(localtaskdir)

    if (verbose):
        print("Fetching metadata...")
    remotetaskdir = op.dirname(metadata)
    metadata = utils.get(metadata, localtaskdir)[0]
    task_id = metadata.split('.')[0].split('-')[-1]
    # The above grabs an ID from the form: fname-#.ext

    # Parse metadata
    metadata = json.load(open(metadata))
    descriptor = metadata['tool']
    invocation = metadata['invocation']
    input_data = metadata['dataloc']
    output_loc = utils.truepath(metadata['taskloc'])

    if (verbose):
        print("Fetching descriptor and invocation...")
    # Get descriptor and invocation
    desc_local = utils.get(descriptor, localtaskdir)[0]
    invo_local = utils.get(invocation, localtaskdir)[0]

    # Get input data, if running remotely
    if not kwargs.get("local") and \
       any([dl.startswith("s3://") for dl in input_data]):
        if (verbose):
            print("Fetching input data...")
        localdatadir = op.join(localtaskdir, "data")
        for dataloc in input_data:
            utils.get(dataloc, localdatadir)
        # Move to correct location
        os.chdir(localdatadir)
    else:
        if (verbose):
            print("Skipping data fetch (local execution)...")
        if kwargs.get("workdir") and op.exists(kwargs.get("workdir")):
            os.chdir(kwargs["workdir"])

    if (verbose):
        print("Beginning execution...")
    # Launch task
    start_time = time.time()
    if kwargs.get("volumes"):
        volumes = " ".join(kwargs.get("volumes"))
        bosh_output = bosh.execute('launch', desc_local, invo_local, '-v',
                                   volumes)
    else:
        bosh_output = bosh.execute('launch', desc_local, invo_local)
        if (verbose):
            print(bosh_output)
    duration = time.time() - start_time

    # Get list of bosh exec outputs
    with open(desc_local) as fhandle:
        outputs_all = json.load(fhandle)["output-files"]

    outputs_present = []
    outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/')
    for outfile in outputs_all.values():
        outputs_present += [outfile] if op.exists(outfile) else []

    # Write stdout to file
    stdoutf = "stdout-{}.txt".format(task_id)
    with open(op.join(localtaskdir, stdoutf), "w") as fhandle:
        fhandle.write(bosh_output.stdout)
    utils.post(op.join(localtaskdir, stdoutf), remotetaskdir)

    # Write sterr to file
    stderrf = "stderr-{}.txt".format(task_id)
    with open(op.join(localtaskdir, stderrf), "w") as fhandle:
        fhandle.write(bosh_output.stderr)
    utils.post(op.join(localtaskdir, stderrf), remotetaskdir)

    # Write summary values to file, including:
    summary = {
        "duration": duration,
        "exitcode": bosh_output.exit_code,
        "outputs": [],
        "stdout": op.join(remotetaskdir, stdoutf),
        "stderr": op.join(remotetaskdir, stderrf)
    }

    if not kwargs.get("local"):
        if (verbose):
            print("Uploading outputs...")
        # Push outputs
        for local_output in outputs_present:
            if (verbose):
                print("{} --> {}".format(local_output, output_loc))
            summary["outputs"] += utils.post(local_output, output_loc)
    else:
        if (verbose):
            print("Skipping uploading outputs (local execution)...")
        summary["outputs"] = outputs_present

    summarf = "summary-{}.json".format(task_id)
    with open(op.join(localtaskdir, summarf), "w") as fhandle:
        fhandle.write(json.dumps(summary) + "\n")
    utils.post(op.join(localtaskdir, summarf), remotetaskdir)
Exemplo n.º 11
0
 def test_example3_filepathrenaming(self):
     self.assertEquals(
         bosh.evaluate(self.get_file_path("example3.json"),
                       self.get_file_path("invocation.json"),
                       "output-files/"), {'logfile': 'log-FileValue.txt'})
Exemplo n.º 12
0
def process_task(metadata):
    # Get metadata
    local_task_dir = "/task/"
    print("Fetching metadata...")
    metadata = get(metadata, local_task_dir)[0]

    # Parse metadata
    metadata   = json.load(open(metadata))
    descriptor = metadata['descriptor']
    invocation = metadata['invocation']
    input_data = metadata['input_data']
    output_loc = metadata['output_loc']

    print("Fetching descriptor and invocation...")
    # Get descriptor and invocation
    desc_local = get(descriptor, local_task_dir)[0]
    invo_local = get(invocation, local_task_dir)[0]

    task_loc   = op.dirname(invocation)
    invo_id    = invo_local.split('.')[0].split('-')[-1]
    # The above grabs an ID from the form: fname-#.ext

    print("Fetching input data...")
    # Get input data
    local_data_dir = "/clowdata/"
    for dataloc in input_data:
        get(dataloc, local_data_dir)

    # Move to correct location
    os.chdir(local_data_dir)

    print("Beginning execution...")
    # Launch task
    try:
        std = bosh.execute('launch',  desc_local, invo_local)
        # graph_dir = '{}clowprov/'.format(local_data_dir)
        # graph_name = '{}clowdrgraph-{}.rpz'.format(graph_dir, invo_id)

        # cmd = 'reprozip trace -w --dir={} bosh exec launch {} {}'
        # os.system(cmd.format(graph_dir, desc_local, invo_local))

        # cmd = 'reprozip pack --dir={} {}'
        # os.system(cmd.format(graph_dir, graph_name))

        # print("{} --> {}".format(graph_name, op.join(task_loc, op.basename(graph_name))))
        # post(graph_name, op.join(task_loc, op.basename(graph_name)))
    except ImportError:
        print("(Reprozip not installed, no provenance tracing)")
        std = bosh.execute('launch',  desc_local, invo_local)

    # Get list of bosh exec outputs
    with open(desc_local) as fhandle:
        outputs_all = json.load(fhandle)["output-files"]

    outputs_present = []
    outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/')
    for outfile in outputs_all.values():
        outputs_present += [outfile] if op.exists(outfile) else []

    print("Uploading outputs...")
    # Push outputs
    for local_output in outputs_present:
        print("{} --> {}".format(local_output, output_loc))
        post(local_output, output_loc)
Exemplo n.º 13
0
    def manageTask(self, taskfile, provdir=None, verbose=False, **kwargs):
        # Get metadata
        if provdir is None:
            self.localtaskdir = "/clowtask/"
        else:
            self.localtaskdir = provdir

        # The below grabs an ID from the form: /some/path/to/fname-#.ext
        self.task_id = taskfile.split('.')[0].split('-')[-1]

        self.localtaskdir = op.join(self.localtaskdir,
                                    "clowtask_" + self.task_id)
        if not op.exists(self.localtaskdir):
            os.makedirs(self.localtaskdir)

        if (verbose):
            print("Fetching metadata...", flush=True)
        remotetaskdir = op.dirname(taskfile)
        taskfile = utils.get(taskfile, self.localtaskdir)[0]

        # Parse metadata
        taskinfo = json.load(open(taskfile))
        descriptor = taskinfo['tool']
        invocation = taskinfo['invocation']
        input_data = taskinfo['dataloc']
        output_loc = utils.truepath(taskinfo['taskloc'])

        if (verbose):
            print("Fetching descriptor and invocation...", flush=True)
        # Get descriptor and invocation
        desc_local = utils.get(descriptor, self.localtaskdir)[0]
        invo_local = utils.get(invocation, self.localtaskdir)[0]

        # Get input data, if running remotely
        if not kwargs.get("local") and \
           any([dl.startswith("s3://") for dl in input_data]):
            if (verbose):
                print("Fetching input data...", flush=True)
            localdatadir = op.join("/data")
            local_input_data = []
            for dataloc in input_data:
                local_input_data += utils.get(dataloc, localdatadir)
            # Move to correct location
            os.chdir(localdatadir)
        else:
            if (verbose):
                print("Skipping data fetch (local execution)...", flush=True)
            if kwargs.get("workdir") and op.exists(kwargs.get("workdir")):
                os.chdir(kwargs["workdir"])

        if (verbose):
            print("Beginning execution...", flush=True)
        # Launch task
        copts = ['launch', desc_local, invo_local]
        if kwargs.get("volumes"):
            copts += ['-v'] + kwargs.get("volumes")
        if kwargs.get("user"):
            copts += ['-u']

        start_time = time.time()
        self.provLaunch(copts, verbose=verbose, **kwargs)
        if (verbose):
            print(self.output, flush=True)
        duration = time.time() - start_time

        # Get list of bosh exec outputs
        with open(desc_local) as fhandle:
            outputs_all = json.load(fhandle)["output-files"]

        outputs_present = []
        outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/')
        for outfile in outputs_all.values():
            outputs_present += [outfile] if op.exists(outfile) else []

        # Write memory/cpu stats to file
        usagef = "task-{}-usage.csv".format(self.task_id)
        self.cpu_ram_usage.to_csv(op.join(self.localtaskdir, usagef),
                                  sep=',',
                                  index=False)
        utils.post(op.join(self.localtaskdir, usagef), remotetaskdir)

        # Write stdout to file
        stdoutf = "task-{}-stdout.txt".format(self.task_id)
        with open(op.join(self.localtaskdir, stdoutf), "w") as fhandle:
            fhandle.write(self.output.stdout)
        utils.post(op.join(self.localtaskdir, stdoutf), remotetaskdir)

        # Write sterr to file
        stderrf = "task-{}-stderr.txt".format(self.task_id)
        with open(op.join(self.localtaskdir, stderrf), "w") as fhandle:
            fhandle.write(self.output.stderr)
        utils.post(op.join(self.localtaskdir, stderrf), remotetaskdir)

        start_time = datetime.fromtimestamp(mktime(localtime(start_time)))
        summary = {
            "duration": duration,
            "launchtime": str(start_time),
            "exitcode": self.output.exit_code,
            "outputs": [],
            "usage": op.join(remotetaskdir, usagef),
            "stdout": op.join(remotetaskdir, stdoutf),
            "stderr": op.join(remotetaskdir, stderrf)
        }

        if not kwargs.get("local"):
            if (verbose):
                print("Uploading outputs...", flush=True)
            # Push outputs
            for local_output in outputs_present:
                if (verbose):
                    print("{} --> {}".format(local_output, output_loc),
                          flush=True)
                tmpouts = utils.post(local_output, output_loc)
                print(tmpouts)
                summary["outputs"] += tmpouts
        else:
            if (verbose):
                print("Skipping uploading outputs (local execution)...",
                      flush=True)
            summary["outputs"] = outputs_present

        summarf = "task-{}-summary.json".format(self.task_id)
        with open(op.join(self.localtaskdir, summarf), "w") as fhandle:
            fhandle.write(json.dumps(summary, indent=4, sort_keys=True) + "\n")
        utils.post(op.join(self.localtaskdir, summarf), remotetaskdir)

        # If not local, delete all: inputs, outputs, and summaries
        if not kwargs.get("local"):
            for local_output in outputs_present:
                utils.remove(local_output)
            utils.remove(self.localtaskdir)
            for local_input in local_input_data:
                utils.remove(local_input)