def test_evalinput(self): self.set_examples() query = bosh.evaluate(self.desc, self.invo, "inputs/") expect = { 'str_input_list': ["fo '; echo FAIL", 'bar'], 'str_input': 'coin;plop', 'config_num': 4, 'num_input': None, 'file_input': './setup.py', 'enum_input': 'val1', 'list_int_input': [1, 2, 3], 'flag_input': None } assert (query == expect) query = bosh.evaluate(self.desc, self.invo, "inputs/type=Flag,id=flag_input", "inputs/type=Number") expect = [{ 'flag_input': None }, { 'config_num': 4, 'num_input': None, 'list_int_input': [1, 2, 3] }] assert (query == expect) query = bosh.evaluate(self.desc, self.invo, "inputs/id=strinputs") expect = {} assert (query == expect) query = bosh.evaluate(self.desc, self.invo, "inputt/nonsense=strinputs") expect = {} assert (query == expect)
def test_evalgroups(self): self.set_examples() query = bosh.evaluate(self.desc, self.invo, "groups/") expect = {'an_example_group': {'num_input': None, 'enum_input': 'val1'}} self.assertEqual(query, expect) query = bosh.evaluate(self.desc, self.invo, "groups/mutually-exclusive=True") expect = {'an_example_group': {'num_input': None, 'enum_input': 'val1'}} self.assertEqual(query, expect)
def test_evaloutput_from_zenodo(self, mock_get): self.set_examples_from_zenodo() query = bosh.evaluate(self.desc, self.invo, "output-files/") expect = {'logfile': 'log-4-coin;plop.txt', 'output_files': 'output/*_exampleOutputTag.resultType', 'config_file': './config.txt'} self.assertEqual(query, expect) query = bosh.evaluate(self.desc, self.invo, "output-files/id=logfile") expect = {'logfile': 'log-4-coin;plop.txt'} self.assertEqual(query, expect) query = bosh.evaluate(self.desc, self.invo, "output-files/id=log-file") expect = {} self.assertEqual(query, expect)
def test_evaloutput_json_obj(self): self.set_examples_json_obj() query = bosh.evaluate(self.desc, self.invo, "output-files/") expect = {'logfile': 'log-4-coin;plop.txt', 'output_files': 'output/*_exampleOutputTag.resultType', 'config_file': './subdir1/subdir2/config.txt'} self.assertEqual(query, expect) query = bosh.evaluate(self.desc, self.invo, "output-files/id=logfile") expect = {'logfile': 'log-4-coin;plop.txt'} self.assertEqual(query, expect) query = bosh.evaluate(self.desc, self.invo, "output-files/id=log-file") expect = {} self.assertEqual(query, expect)
def test_evaloutput(self): self.set_examples() query = bosh.evaluate(self.desc, self.invo, "output-files/") expect = {'logfile': 'log-4-coin;plop.txt', 'output_files': 'output/*_exampleOutputTag.resultType', 'config_file': './config.txt'} assert(query == expect) query = bosh.evaluate(self.desc, self.invo, "output-files/id=logfile") expect = {'logfile': 'log-4-coin;plop.txt'} assert(query == expect) query = bosh.evaluate(self.desc, self.invo, "output-files/id=log-file") expect = {} assert(query == expect)
def test_evaloutput(self): example1_dir = os.path.join(self.get_examples_dir(), "example1") desc = os.path.join(example1_dir, "example1_docker.json") invo = os.path.join(example1_dir, "invocation.json") query = bosh.evaluate(desc, invo, "invalid-query") out, err = self.capfd.readouterr() self.assertIn("[ ERROR ]", out)
def test(descriptor, test, invocation): # Run pipeline. ret = bosh.execute("launch", descriptor, invocation.name, "--skip-data-collection") print(ret) # Choose appropriate assertion scenario assertions = test["assertions"] if "exit-code" in assertions: assert ret.exit_code == assertions["exit-code"] if "output-files" in assertions: # Acquiring a hash map of output ids mapping to output file paths. outputted = bosh.evaluate(descriptor, invocation.name, "output-files/") for output_file in assertions["output-files"]: file_path = outputted[output_file["id"]] assert op.exists(file_path) # Optionaly, an md5 reference may have been specified if "md5-reference" in output_file: # MD5 checksum comparaison output_md5 = compute_md5(file_path) reference_md5 = output_file["md5-reference"] assert output_md5 == reference_md5
def test(descriptor, test, invocation, paramsDict): arguments = ["launch", descriptor, invocation.name] # Add any additional params to arguments for flag, value in paramsDict.items(): arguments.append(flag) if value is not None: arguments.append(value) print(arguments) # Run pipeline. ret = bosh.execute(*arguments) print(ret) # Choose appropriate assertion scenario assertions = test["assertions"] if "exit-code" in assertions: assert ret.exit_code == assertions["exit-code"] if "output-files" in assertions: # Acquiring a hash map of output ids mapping to output file paths. outputted = bosh.evaluate(descriptor, invocation.name, "output-files/") for output_file in assertions["output-files"]: file_path = outputted[output_file["id"]] assert op.exists(file_path) # Optionaly, an md5 reference may have been specified if "md5-reference" in output_file: # MD5 checksum comparaison output_md5 = compute_md5(file_path) reference_md5 = output_file["md5-reference"] assert output_md5 == reference_md5
def generate_tests(self): # Get descriptor as JSON desc_JSON = json.loads(self.db_desc.data_file.read()) # Create new test entries test_list = [] for test_JSON in desc_JSON['tests']: test = DescriptorTest() test.test_name = test_JSON['name'] test.descriptor = self.db_desc test_list.append(test_JSON['name']) # Evaluate the descriptor's command line, using the invocation specified by the test. # To perform this evaluation, we need to extract the invocation from the test and put it into a temporary file invocation_tmp_file = create_temporary_file( json.dumps(test_JSON['invocation']).encode()) # If the invocation is erroneous, we simply mention it in in the entry itself # A wrongfull invocation should however not put a halt to the entire evaluation of a descriptor. erroneous_invocation = False try: bosh.invocation(self.db_desc.data_file.file.name, '-i', invocation_tmp_file.name) except: erroneous_invocation = True # Rewind invocation_tmp_file.seek(0) if (erroneous_invocation): test.evaluated_invocation = "Error: invalid invocation" else: #test.evaluated_invocation = bosh.evaluate(self.db_desc.data_file.file.name, invocation_tmp_file.name, "command-line/") test.evaluated_invocation = get_bosh_cmdline( self.db_desc.data_file.file.name, invocation_tmp_file.name) test.save() invocation_tmp_file.close() # Create assertion entries. if test_JSON['assertions'].get('exit-code') != None: # Create assertion entry with exit-code assertion = DescriptorTestAssertion() assertion.test = test assertion.operand1 = test_JSON['assertions']['exit-code'] assertion.type = ASSERTION_EXITCODE assertion.save() output_files = None if (erroneous_invocation == False): output_files = bosh.evaluate(self.db_desc.data_file.file.name, invocation_tmp_file.name, "output-files/") if test_JSON['assertions'].get('output-files') != None: for ouput_assertion_JSON in test_JSON['assertions'][ 'output-files']: assertion = DescriptorTestAssertion() assertion.test = test # Id processing id = ouput_assertion_JSON['id'] if erroneous_invocation == True: # Skip the evaluation of this entry because the associated invocation is invalid. assertion.operand1 = "Cannot evaluate: invocation invalid" else: assertion.operand1 = output_files[id] # MD5 reference processing if ouput_assertion_JSON.get('md5-reference') != None: assertion.operand2 = ouput_assertion_JSON[ 'md5-reference'] assertion.type = ASSERTION_OUTPUT_FILE_MATCHES_MD5 else: assertion.type = ASSERTION_OUTPUT_FILE_EXISTS assertion.save() # We are done filling up the test entry. self.db_desc.data_file.seek(0)
def processTask(metadata, clowdrloc=None, verbose=False, **kwargs): # Get metadata if clowdrloc is None: localtaskdir = "/clowtask/" else: localtaskdir = clowdrloc localtaskdir = op.join(localtaskdir, "clowtask_" + utils.randstring(3)) if not op.exists(localtaskdir): os.makedirs(localtaskdir) if (verbose): print("Fetching metadata...") remotetaskdir = op.dirname(metadata) metadata = utils.get(metadata, localtaskdir)[0] task_id = metadata.split('.')[0].split('-')[-1] # The above grabs an ID from the form: fname-#.ext # Parse metadata metadata = json.load(open(metadata)) descriptor = metadata['tool'] invocation = metadata['invocation'] input_data = metadata['dataloc'] output_loc = utils.truepath(metadata['taskloc']) if (verbose): print("Fetching descriptor and invocation...") # Get descriptor and invocation desc_local = utils.get(descriptor, localtaskdir)[0] invo_local = utils.get(invocation, localtaskdir)[0] # Get input data, if running remotely if not kwargs.get("local") and \ any([dl.startswith("s3://") for dl in input_data]): if (verbose): print("Fetching input data...") localdatadir = op.join(localtaskdir, "data") for dataloc in input_data: utils.get(dataloc, localdatadir) # Move to correct location os.chdir(localdatadir) else: if (verbose): print("Skipping data fetch (local execution)...") if kwargs.get("workdir") and op.exists(kwargs.get("workdir")): os.chdir(kwargs["workdir"]) if (verbose): print("Beginning execution...") # Launch task start_time = time.time() if kwargs.get("volumes"): volumes = " ".join(kwargs.get("volumes")) bosh_output = bosh.execute('launch', desc_local, invo_local, '-v', volumes) else: bosh_output = bosh.execute('launch', desc_local, invo_local) if (verbose): print(bosh_output) duration = time.time() - start_time # Get list of bosh exec outputs with open(desc_local) as fhandle: outputs_all = json.load(fhandle)["output-files"] outputs_present = [] outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/') for outfile in outputs_all.values(): outputs_present += [outfile] if op.exists(outfile) else [] # Write stdout to file stdoutf = "stdout-{}.txt".format(task_id) with open(op.join(localtaskdir, stdoutf), "w") as fhandle: fhandle.write(bosh_output.stdout) utils.post(op.join(localtaskdir, stdoutf), remotetaskdir) # Write sterr to file stderrf = "stderr-{}.txt".format(task_id) with open(op.join(localtaskdir, stderrf), "w") as fhandle: fhandle.write(bosh_output.stderr) utils.post(op.join(localtaskdir, stderrf), remotetaskdir) # Write summary values to file, including: summary = { "duration": duration, "exitcode": bosh_output.exit_code, "outputs": [], "stdout": op.join(remotetaskdir, stdoutf), "stderr": op.join(remotetaskdir, stderrf) } if not kwargs.get("local"): if (verbose): print("Uploading outputs...") # Push outputs for local_output in outputs_present: if (verbose): print("{} --> {}".format(local_output, output_loc)) summary["outputs"] += utils.post(local_output, output_loc) else: if (verbose): print("Skipping uploading outputs (local execution)...") summary["outputs"] = outputs_present summarf = "summary-{}.json".format(task_id) with open(op.join(localtaskdir, summarf), "w") as fhandle: fhandle.write(json.dumps(summary) + "\n") utils.post(op.join(localtaskdir, summarf), remotetaskdir)
def test_example3_filepathrenaming(self): self.assertEquals( bosh.evaluate(self.get_file_path("example3.json"), self.get_file_path("invocation.json"), "output-files/"), {'logfile': 'log-FileValue.txt'})
def process_task(metadata): # Get metadata local_task_dir = "/task/" print("Fetching metadata...") metadata = get(metadata, local_task_dir)[0] # Parse metadata metadata = json.load(open(metadata)) descriptor = metadata['descriptor'] invocation = metadata['invocation'] input_data = metadata['input_data'] output_loc = metadata['output_loc'] print("Fetching descriptor and invocation...") # Get descriptor and invocation desc_local = get(descriptor, local_task_dir)[0] invo_local = get(invocation, local_task_dir)[0] task_loc = op.dirname(invocation) invo_id = invo_local.split('.')[0].split('-')[-1] # The above grabs an ID from the form: fname-#.ext print("Fetching input data...") # Get input data local_data_dir = "/clowdata/" for dataloc in input_data: get(dataloc, local_data_dir) # Move to correct location os.chdir(local_data_dir) print("Beginning execution...") # Launch task try: std = bosh.execute('launch', desc_local, invo_local) # graph_dir = '{}clowprov/'.format(local_data_dir) # graph_name = '{}clowdrgraph-{}.rpz'.format(graph_dir, invo_id) # cmd = 'reprozip trace -w --dir={} bosh exec launch {} {}' # os.system(cmd.format(graph_dir, desc_local, invo_local)) # cmd = 'reprozip pack --dir={} {}' # os.system(cmd.format(graph_dir, graph_name)) # print("{} --> {}".format(graph_name, op.join(task_loc, op.basename(graph_name)))) # post(graph_name, op.join(task_loc, op.basename(graph_name))) except ImportError: print("(Reprozip not installed, no provenance tracing)") std = bosh.execute('launch', desc_local, invo_local) # Get list of bosh exec outputs with open(desc_local) as fhandle: outputs_all = json.load(fhandle)["output-files"] outputs_present = [] outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/') for outfile in outputs_all.values(): outputs_present += [outfile] if op.exists(outfile) else [] print("Uploading outputs...") # Push outputs for local_output in outputs_present: print("{} --> {}".format(local_output, output_loc)) post(local_output, output_loc)
def manageTask(self, taskfile, provdir=None, verbose=False, **kwargs): # Get metadata if provdir is None: self.localtaskdir = "/clowtask/" else: self.localtaskdir = provdir # The below grabs an ID from the form: /some/path/to/fname-#.ext self.task_id = taskfile.split('.')[0].split('-')[-1] self.localtaskdir = op.join(self.localtaskdir, "clowtask_" + self.task_id) if not op.exists(self.localtaskdir): os.makedirs(self.localtaskdir) if (verbose): print("Fetching metadata...", flush=True) remotetaskdir = op.dirname(taskfile) taskfile = utils.get(taskfile, self.localtaskdir)[0] # Parse metadata taskinfo = json.load(open(taskfile)) descriptor = taskinfo['tool'] invocation = taskinfo['invocation'] input_data = taskinfo['dataloc'] output_loc = utils.truepath(taskinfo['taskloc']) if (verbose): print("Fetching descriptor and invocation...", flush=True) # Get descriptor and invocation desc_local = utils.get(descriptor, self.localtaskdir)[0] invo_local = utils.get(invocation, self.localtaskdir)[0] # Get input data, if running remotely if not kwargs.get("local") and \ any([dl.startswith("s3://") for dl in input_data]): if (verbose): print("Fetching input data...", flush=True) localdatadir = op.join("/data") local_input_data = [] for dataloc in input_data: local_input_data += utils.get(dataloc, localdatadir) # Move to correct location os.chdir(localdatadir) else: if (verbose): print("Skipping data fetch (local execution)...", flush=True) if kwargs.get("workdir") and op.exists(kwargs.get("workdir")): os.chdir(kwargs["workdir"]) if (verbose): print("Beginning execution...", flush=True) # Launch task copts = ['launch', desc_local, invo_local] if kwargs.get("volumes"): copts += ['-v'] + kwargs.get("volumes") if kwargs.get("user"): copts += ['-u'] start_time = time.time() self.provLaunch(copts, verbose=verbose, **kwargs) if (verbose): print(self.output, flush=True) duration = time.time() - start_time # Get list of bosh exec outputs with open(desc_local) as fhandle: outputs_all = json.load(fhandle)["output-files"] outputs_present = [] outputs_all = bosh.evaluate(desc_local, invo_local, 'output-files/') for outfile in outputs_all.values(): outputs_present += [outfile] if op.exists(outfile) else [] # Write memory/cpu stats to file usagef = "task-{}-usage.csv".format(self.task_id) self.cpu_ram_usage.to_csv(op.join(self.localtaskdir, usagef), sep=',', index=False) utils.post(op.join(self.localtaskdir, usagef), remotetaskdir) # Write stdout to file stdoutf = "task-{}-stdout.txt".format(self.task_id) with open(op.join(self.localtaskdir, stdoutf), "w") as fhandle: fhandle.write(self.output.stdout) utils.post(op.join(self.localtaskdir, stdoutf), remotetaskdir) # Write sterr to file stderrf = "task-{}-stderr.txt".format(self.task_id) with open(op.join(self.localtaskdir, stderrf), "w") as fhandle: fhandle.write(self.output.stderr) utils.post(op.join(self.localtaskdir, stderrf), remotetaskdir) start_time = datetime.fromtimestamp(mktime(localtime(start_time))) summary = { "duration": duration, "launchtime": str(start_time), "exitcode": self.output.exit_code, "outputs": [], "usage": op.join(remotetaskdir, usagef), "stdout": op.join(remotetaskdir, stdoutf), "stderr": op.join(remotetaskdir, stderrf) } if not kwargs.get("local"): if (verbose): print("Uploading outputs...", flush=True) # Push outputs for local_output in outputs_present: if (verbose): print("{} --> {}".format(local_output, output_loc), flush=True) tmpouts = utils.post(local_output, output_loc) print(tmpouts) summary["outputs"] += tmpouts else: if (verbose): print("Skipping uploading outputs (local execution)...", flush=True) summary["outputs"] = outputs_present summarf = "task-{}-summary.json".format(self.task_id) with open(op.join(self.localtaskdir, summarf), "w") as fhandle: fhandle.write(json.dumps(summary, indent=4, sort_keys=True) + "\n") utils.post(op.join(self.localtaskdir, summarf), remotetaskdir) # If not local, delete all: inputs, outputs, and summaries if not kwargs.get("local"): for local_output in outputs_present: utils.remove(local_output) utils.remove(self.localtaskdir) for local_input in local_input_data: utils.remove(local_input)