def _checkPulblishable(self, files_list): # dict {filename: content_dict} desc_to_publish = set() publishable_dict = {} for fl in files_list: fl_path = os.path.join(self.cache_dir, fl) fl_dict = loadJson(fl_path) doi = fl_dict.get('summary').get('descriptor-doi') # Descriptor is not publish, record contains link to file if doi.split("_")[0] == "descriptor": desc_path = os.path.join(self.cache_dir, doi) desc_dict = loadJson(desc_path) # Descriptor is published, record needs to be updated if desc_dict.get('doi') is not None: fl_dict['summary']['descriptor-doi'] = desc_dict['doi'] publishable_dict[fl] = fl_dict # Descriptor isn't published, inform user with full prompt else: print("Record {0} cannot be published as its descriptor " "is not yet published. ".format(fl)) desc_to_publish.add("bosh publish {}".format(desc_path)) # Descriptor doi is stored correctly in record else: publishable_dict[fl] = fl_dict # Prompt user to publish descriptors if len(desc_to_publish) != 0: print("Some descriptors have not been published, they can be " "published with following commands:") for prompt in desc_to_publish: print("\t" + prompt) return publishable_dict
def invocation(*params): parser = ArgumentParser("Creates invocation schema and validates" " invocations. Uses descriptor's invocation" " schema if it exists, otherwise creates one.") parser.add_argument("descriptor", action="store", help="The Boutiques descriptor as a JSON file, JSON " "string or Zenodo ID (prefixed by 'zenodo.').") parser.add_argument("-i", "--invocation", action="store", help="Input values in a JSON file or as a JSON " "object to be validated against " "the invocation schema.") parser.add_argument("-w", "--write-schema", action="store_true", help="If descriptor doesn't have an invocation " "schema, creates one and writes it to the descriptor" " file ") result = parser.parse_args(params) validate(result.descriptor) descriptor = loadJson(result.descriptor) if descriptor.get("invocation-schema"): invSchema = descriptor.get("invocation-schema") else: from boutiques.invocationSchemaHandler import generateInvocationSchema invSchema = generateInvocationSchema(descriptor) if result.write_schema: descriptor["invocation-schema"] = invSchema with open(result.descriptor, "w") as f: f.write(json.dumps(descriptor, indent=4, sort_keys=True)) if result.invocation: from boutiques.invocationSchemaHandler import validateSchema data = addDefaultValues(descriptor, loadJson(result.invocation)) validateSchema(invSchema, data)
def test(*params): parser = ArgumentParser("Perform all the tests defined within the" " given descriptor") parser.add_argument("descriptor", action="store", help="The Boutiques descriptor as a JSON file, JSON " "string or Zenodo ID (prefixed by 'zenodo.').") result = parser.parse_args(params) # Generation of the invocation schema (and descriptor validation). invocation(result.descriptor) # Extraction of all the invocations defined for the test-cases. descriptor = loadJson(result.descriptor) if (not descriptor.get("tests")): # If no tests have been specified, we consider testing successful. return 0 for test in descriptor["tests"]: invocation_JSON = test["invocation"] # Check if the invocation is valid. invocation(result.descriptor, "--invocation", json.dumps(invocation_JSON)) # Invocations have been properly validated. We can launch the actual tests. test_path = op.join(op.dirname(op.realpath(__file__)), "test.py") return pytest.main([test_path, "--descriptor", result.descriptor])
def pprint(*params): parser = parser_pprint() results = parser.parse_args(params) from boutiques.prettyprint import PrettyPrinter desc = loadJson(results.descriptor, sandbox=results.sandbox) prettyclass = PrettyPrinter(desc) return prettyclass.docstring
def invocation(*params): parser = parser_invocation() result = parser.parse_args(params) validate(result.descriptor) descriptor = loadJson(result.descriptor) if descriptor.get("invocation-schema"): invSchema = descriptor.get("invocation-schema") else: from boutiques.invocationSchemaHandler import generateInvocationSchema invSchema = generateInvocationSchema(descriptor) if result.write_schema: descriptor["invocation-schema"] = invSchema with open(result.descriptor, "w") as f: f.write(json.dumps(descriptor, indent=4, sort_keys=True)) if result.invocation: from boutiques.invocationSchemaHandler import validateSchema data = addDefaultValues(descriptor, loadJson(result.invocation)) validateSchema(invSchema, data)
def prettyprint(*params): parser = ArgumentParser("Boutiques pretty-print for generating help text") parser.add_argument("descriptor", action="store", help="The Boutiques descriptor.") results = parser.parse_args(params) from boutiques.prettyprint import PrettyPrinter desc = loadJson(results.descriptor) prettyclass = PrettyPrinter(desc) return prettyclass.docstring
def mock_download_deprecated(url, file_path): # Mocks the download and save of a deprecated descriptor example_1_path = os.path.join( os.path.join(os.path.dirname(bfile), "schema", "examples", "example1", "example1_docker.json")) example_1_json = loadJson(example_1_path) example_1_json['deprecated-by-doi'] = "a_doi" cache_dir = os.path.join(os.path.expanduser('~'), ".cache", "boutiques", "production") with open(file_path, 'w') as f: f.write(json.dumps(example_1_json)) return [f.name]
def test_success_template_camel_case(self): template = './boutiques/templates/basic.json' fil = 'creator_output.json' bosh(['create', fil, '--camel-case']) self.assertIsNone(bosh(['validate', fil])) desc = loadJson(fil) template = loadJson(template) # Check "_" in all instances of input indices (inputs + groups) for inp, camelInp in zip(template['inputs'], desc['inputs']): self.assertTrue("_" in inp['id']) self.assertFalse("_" in camelInp['id']) for mbrs, camelMbrs in [ (grp['members'], camelGrp['members']) for grp, camelGrp in zip(template['groups'], desc['groups']) ]: self.assertTrue(all([("_" in mbr) for mbr in mbrs])) self.assertFalse(all([("_" in camelMbr) for camelMbr in camelMbrs]))
def invocation(*params): parser = parser_invocation() results = parser.parse_args(params) arguments = [results.descriptor] if results.sandbox: arguments.append('--sandbox') validate(*arguments) descriptor = loadJson(results.descriptor, sandbox=results.sandbox) if descriptor.get("invocation-schema"): invSchema = descriptor.get("invocation-schema") else: from boutiques.invocationSchemaHandler import generateInvocationSchema invSchema = generateInvocationSchema(descriptor) if results.write_schema: descriptor["invocation-schema"] = invSchema with open(results.descriptor, "w") as f: f.write(json.dumps(descriptor, indent=4)) if results.invocation: from boutiques.invocationSchemaHandler import validateSchema data = addDefaultValues(descriptor, loadJson(results.invocation)) validateSchema(invSchema, data)
def function(descriptor): ''' Returns a function to invoke bosh.execute on a descriptor. args: descriptor: Zenodo id, file name, or JSON string representing a descriptor. name: name of the function to create. Defaults to the tool name in the descriptor. ''' validate(descriptor) descriptor_json = loadJson(descriptor) def f(*args, **kwargs): # Set default mode to 'launch' if len(args) > 0: mode = args[0] else: mode = 'launch' if mode not in ['launch', 'simulate']: mode = 'launch' else: args = args[1:] # Call bosh execute if mode == 'launch': return execute(mode, descriptor, json.dumps(kwargs), *args) if len(kwargs) > 0: return execute(mode, descriptor, '-i', json.dumps(kwargs), *args) return execute(mode, descriptor, *args) f.__name__ = str(descriptor_json['name']) # Documentation doc = [] doc.append(r'''Runs {0} through its Boutiques interface. *args: - mode: 'launch' or 'simulate'. Defaults to 'launch'. - other arguments: will be passed to bosh execute. Examples: '-s', '-x'. See help(bosh.execute) for a complete list. *kwargs: {1} arguments as defined in the Boutiques descriptor, referenced from input ids. Example: {2}='some_value'. See complete list in descriptor help below. '''.format(f.__name__, f.__name__, descriptor_json['inputs'][0]['id'])) doc.append(pprint(descriptor)) f.__doc__ = ''.join(doc) return f
def fetch_tests(descriptor_input, paramsDict): descriptor = loadJson(descriptor_input) tests = [] # For each test present in the descriptor: for test in descriptor["tests"]: # We first extract the invocation and put it inside a temporary file. invocation_JSON = json.dumps(test["invocation"]) temp_invocation_JSON = tempfile.NamedTemporaryFile(suffix=".json", delete=False) temp_invocation_JSON.write(invocation_JSON.encode()) temp_invocation_JSON.seek(0) # Now we setup the necessary elements for the testing function. tests.append([descriptor_input, test, temp_invocation_JSON, paramsDict]) return (descriptor["name"], tests)
def _clean_cache(self, records_dict): for record in records_dict.keys(): self.delete(record, True) # List remaining records and collect descriptor-doi values self.record_files = [ fl for fl in os.listdir(self.cache_dir) if fl not in self.descriptor_files ] doi_list = [ loadJson(os.path.join(self.cache_dir, fl)).get('summary').get('descriptor-doi') for fl in self.record_files ] # Check each descriptor in remaining records for descriptor in self.descriptor_files: # No records link to descriptor if descriptor not in doi_list: self.delete(descriptor, True) self.descriptor_files.remove(descriptor)
def carmin(self, output_file): carmin_desc = {} descriptor = loadJson(self.descriptor) if descriptor.get('doi'): self.identifier = descriptor.get('doi') if self.identifier is None: raise_error(ExportError, 'Descriptor must have a DOI, or ' 'identifier must be specified with --identifier.') carmin_desc['identifier'] = self.identifier carmin_desc['name'] = descriptor.get('name') carmin_desc['version'] = descriptor.get('tool-version') carmin_desc['description'] = descriptor.get('description') carmin_desc['canExecute'] = True carmin_desc['parameters'] = [] for inp in descriptor.get('inputs'): carmin_desc['parameters'].append( self.convert_input_or_output(inp, False)) for output in descriptor.get('output-files'): carmin_desc['parameters'].append( self.convert_input_or_output(output, True)) carmin_desc['properties'] = {} carmin_desc['properties']['boutiques'] = True if descriptor.get('tags'): for prop in descriptor.get('tags').keys(): carmin_desc['properties'][prop] = descriptor['tags'][prop] carmin_desc['errorCodesAndMessages'] = [] for errors in descriptor.get('error-codes'): obj = {} obj['errorCode'] = errors['code'] obj['errorMessage'] = errors['description'] carmin_desc['errorCodesAndMessages'].append(obj) with open(output_file, 'w') as fhandle: fhandle.write(json.dumps(carmin_desc, indent=4, sort_keys=True))
def test(*params): parser = parser_test() result = parser.parse_args(params) # Generation of the invocation schema (and descriptor validation). invocation(result.descriptor) # Extraction of all the invocations defined for the test-cases. descriptor = loadJson(result.descriptor) if (not descriptor.get("tests")): # If no tests have been specified, we consider testing successful. return 0 for test in descriptor["tests"]: invocation_JSON = test["invocation"] # Check if the invocation is valid. invocation(result.descriptor, "--invocation", json.dumps(invocation_JSON)) # Invocations have been properly validated. We can launch the actual tests. test_path = op.join(op.dirname(op.realpath(__file__)), "test.py") return pytest.main([test_path, "--descriptor", result.descriptor])
def test(*params): parser = parser_test() results = parser.parse_args(params) args = [results.descriptor] if results.sandbox: args.append("--sandbox") # Generation of the invocation schema (and descriptor validation). invocation(*args) # Extraction of all the invocations defined for the test-cases. descriptor = loadJson(results.descriptor, sandbox=results.sandbox) if (not descriptor.get("tests")): # If no tests have been specified, we consider testing successful. return 0 for test in descriptor["tests"]: invocation_JSON = test["invocation"] testArgs = [ results.descriptor, "--invocation", json.dumps(invocation_JSON) ] if results.sandbox: testArgs.append("--sandbox") # Check if the invocation is valid. invocation(*testArgs) # Invocations have been properly validated. We can launch the actual tests. test_path = op.join(op.dirname(op.realpath(__file__)), "test.py") test_args = [test_path, "--descriptor", results.descriptor] if results.imagepath: test_args.extend(["--imagepath", results.imagepath]) return pytest.main(args=test_args)
def upgrade_04(self): """ Differences between 0.4 and current (0.5): -schema version (obv) -singularity should now be represented same as docker -walltime should be part of suggested_resources structure I.e. "schema-version": "0.4", ...... becomes..... "schema-version": "0.5", I.e. "container-image": { "type": "singularity", "url": "shub://gkiar/ndmg-cbrain:master" }, ...... becomes..... "container-image": { "type": "singularity", "image": "gkiar/ndmg-cbrain:master", "index": "shub://", }, I.e. "walltime-estimate": 3600, ...... becomes..... "suggested-resources": { "walltime-estimate": 3600 }, """ descriptor = loadJson(self.input_descriptor) if descriptor["schema-version"] != "0.4": raise_error( ImportError, "The input descriptor must have " "'schema-version'=0.4") descriptor["schema-version"] = "0.5" if "container-image" in descriptor.keys(): if "singularity" == descriptor["container-image"]["type"]: url = descriptor["container-image"]["url"] img = url.split("://") if len(img) == 1: descriptor["container-image"]["image"] = img[0] elif len(img) == 2: descriptor["container-image"]["image"] = img[1] descriptor["container-image"]["index"] = img[0] + "://" del descriptor["container-image"]["url"] elif ("docker" == descriptor["container-image"]["type"] and descriptor["container-image"].get("index")): url = descriptor["container-image"]["index"].split("://")[-1] descriptor["container-image"]["index"] = url if "walltime-estimate" in descriptor.keys(): descriptor["suggested-resources"] =\ {"walltime-estimate": descriptor["walltime-estimate"]} del descriptor["walltime-estimate"] with open(self.output_descriptor, 'w') as fhandle: fhandle.write(json.dumps(descriptor, indent=4, sort_keys=True)) validate_descriptor(self.output_descriptor)
def validate_descriptor(json_file, **kwargs): """ Validates the Boutiques descriptor against the schema. """ path, fil = op.split(bfile) schema_file = op.join(path, "schema", "descriptor.schema.json") # Load schema with open(schema_file) as fhandle: schema = json.load(fhandle) # Load input types according to the schema schema_types = schema['properties']['inputs']['items']['properties'][ 'type']['enum'] allowed_keywords = ['and', 'or', 'false', 'true'] allowed_comparators = ['==', '!=', '<', '>', '<=', '>='] # Load descriptor descriptor = loadJson(json_file) # Validate basic JSON schema compliance for descriptor # Note: if it fails basic schema compliance we don"t do more checks try: validate(descriptor, schema) except ValidationError as e: raise_error(DescriptorValidationError, (str(e))) # Helper get functions def safeGet(desc, sec, targ): if desc.get(sec): return [ item.get(targ) for item in desc[sec] if list(item.keys()).count(targ) ] return [] def inputGet(s): return safeGet(descriptor, "inputs", s) def outputGet(s): return safeGet(descriptor, "output-files", s) def groupGet(s): return safeGet(descriptor, "groups", s) def inById(i): if i in inputGet("id"): return descriptor["inputs"][inputGet("id").index(i)] return {} def isValidConditionalExp(exp): # Return the type of a conditional expression's substring def getSubstringType(s): s = s.strip() if s in schema_types: # Can't realistically distinguish File from String return s if s != "File" else "String" elif re.search(r'^[0-9]*\.?[0-9]+$', s): return "Number" elif re.search(r'^(True|False|false|true)$', s): return "Flag" else: return "String" # Recursively check boolean expression by replacing variables with # their expected value type [Number, String, File, Flag] brackets, startIdx, endIdx = 0, 0, 0 rebuiltExp = "" for idx, c in enumerate(exp): if c == '(': brackets += 1 startIdx = idx + 1 elif c == ')': brackets -= 1 if brackets == 0: endIdx = idx isValidSubExp = isValidConditionalExp(exp[startIdx:endIdx]) # Immediately return false if sub expression is not valid if not isValidSubExp: return False else: rebuiltExp += "{0}".format(isValidSubExp) elif brackets == 0: rebuiltExp += "{0}".format(c) rebuiltExp = rebuiltExp.strip() # If there are no more parentheses, check if sub-expression is valid # rebuiltExp should only be two values separated by operator: x >= y # or values separated by 'AND'/'OR': x and y and z if '(' not in rebuiltExp and ')' not in rebuiltExp: expElements = rebuiltExp.split() for idx, e in enumerate(expElements): e = e.strip() # Compare types of elements neighbouring allowed_comparators if e in allowed_comparators and\ getSubstringType(expElements[idx-1]) !=\ getSubstringType(expElements[idx+1]): return False # Check if keyword element is part of allowed keywords if keyword.iskeyword(e) and e.lower() not in allowed_keywords: return False # Check elements neighbouring and/or keywords are valid words if e.lower() in [ "and", "or" ] and (expElements[idx - 1] not in schema_types + ["True", "False"] or expElements[idx + 1] not in schema_types + ["True", "False"]): return False return True # Begin looking at Boutiques-specific failures errors = [] clkeys = inputGet("value-key") + outputGet("value-key") flattenedTemplates = [y for x in outputGet("file-template") for y in x] configFileTemplates = flattenedTemplates + outputGet("path-template") cmdline = descriptor["command-line"] # Verify that all command-line key appear in the command-line, in # a file template or in an environment variable value msg_template = (" KeyError: \"{0}\" not in command-line or file template" " or environment variables") envValues = "" if descriptor.get('environment-variables'): for env in descriptor.get('environment-variables'): envValues += "||" + env['value'] errors += [ msg_template.format(k) for k in clkeys if ((cmdline + ".".join(configFileTemplates) + envValues).count(k)) < 1 ] # Verify that no key contains another key msg_template = " KeyError: \"{0}\" contains \"{1}\"" errors += [ msg_template.format(key, clkeys[jdx]) for idx, key in enumerate(clkeys) for jdx in range(0, len(clkeys)) if clkeys[jdx] in key and key != clkeys[jdx] ] # Verify that all Ids are unique inIds, outIds = inputGet("id"), outputGet("id") grpIds = groupGet("id") if "groups" in descriptor.keys() else [] allIds = inIds + outIds + grpIds msg_template = " IdError: \"{0}\" is non-unique" for idx, s1 in enumerate(allIds): for jdx, s2 in enumerate(allIds): if s1 == s2 and idx < jdx: errors += [msg_template.format(s1)] else: errors += [] # Verify that identical keys only exist if they are both in mutex groups msg_template = " MutExError: \"{0}\" belongs to 2+ non exclusive IDs" for idx, key in enumerate(clkeys): for jdx in range(idx + 1, len(clkeys)): if clkeys[jdx] == key: mids = [ inById(mid)["id"] for mid in inIds if inById(mid)["value-key"] == key ] for idx, grp in enumerate(descriptor.get("groups")): mutex = grp.get("mutually-exclusive") if set(grp["members"]) == set(mids) and not mutex: errors += [msg_template.format(key)] # Verify that output files have unique path-templates msg_template = ("OutputError: \"{0}\" and \"{1}\" have the same " "path-template") for ix, o1 in zip(outputGet("id"), outputGet("path-template")): for jx, o2 in zip(outputGet("id"), outputGet("path-template")): if o1 == o2 and jx != ix: errors += [msg_template.format(ix, jx)] else: errors += [] if 'output-files' in descriptor: # Verify output file with non-optional conditional file template # contains a default path msg_template = ("OutputError: \"{0}\". Non-optional output-file with " "conditional-path-template must contain " "\"default\" path-template.") cond_outfiles_keys = [] for outF in [ o for o in descriptor["output-files"] if 'conditional-path-template' in o and not o['optional'] ]: out_keys = [ list(obj.keys())[0] for obj in outF['conditional-path-template'] ] cond_outfiles_keys.extend(out_keys) if 'default' not in out_keys: errors += [msg_template.format(outF['id'])] # Verify output keys contain only one default condition if out_keys.count('default') > 1: errors += [ "OutputError: \"{0}\". Only one \"default\" " "condition is permitted in a " "conditional-path-template.".format(outF['id']) ] # Verify output key contains variables that correspond to input IDs # or is 'default' msg_template = ("OutputError: \"{0}\" contains non-python keyword and " "non-ID string: \"{1}\"") for templateKey in cond_outfiles_keys: splitExp = conditionalExpFormat(templateKey).split() if splitExp[0] == 'default' and len(splitExp) == 1: continue for s in [ s for s in splitExp if not keyword.iskeyword(s) and s.isalnum() and not s.isdigit() and s not in [i['id'] for i in descriptor['inputs']] and s not in [i['id'] for i in descriptor['output-files']] ]: errors += [msg_template.format(outF['id'], s)] # Verify variable is being evaluated against a value of the same type msg_template = ("OutputError: Conditional output \"{0}\" contains " "invalid conditional expression. Verify arguments' " "type and allowed keywords in expression.") for templateKey in cond_outfiles_keys: splitExp = conditionalExpFormat(templateKey).split() if splitExp[0] == 'default' and len(splitExp) == 1: continue # Replace variable by it's type according to the descriptor schema for s in [ s for s in enumerate(splitExp) if not keyword.iskeyword(s[1]) and s[1].isalnum() and not s[1].isdigit() ]: if s[1] in [i['id'] for i in descriptor['inputs']]: splitExp[s[0]] = inById(s[1])['type'] # Check if the conditional expression is valid if not isValidConditionalExp(" ".join(splitExp)): errors += [msg_template.format(templateKey)] # Verify inputs for inp in descriptor["inputs"]: # Add optional property in case it's not # there (default to false as in JSON) if "optional" not in inp.keys(): inp["optional"] = False # Verify flag-type inputs (have flags, not required, cannot be lists) if inp["type"] == "Flag": msg_template = " InputError: \"{0}\" must have a command-line flag" if "command-line-flag" not in inp.keys(): errors += [msg_template.format(inp["id"])] else: errors += [] msg_template = " InputError: \"{0}\" is of type Flag,"\ " it has to be optional" if inp["optional"] is False: errors += [msg_template.format(inp["id"])] else: errors += [] # Verify number-type inputs min/max are sensible elif inp["type"] == "Number": msg_template = (" InputError: \"{0}\" cannot have greater" " min ({1}) than max ({2})") minn = inp["minimum"] if "minimum" in inp.keys() else -float("Inf") maxx = inp["maximum"] if "maximum" in inp.keys() else float("Inf") if minn > maxx: errors += [msg_template.format(inp["id"], minn, maxx)] else: errors += [] # Verify enum-type inputs (at least 1 option, default in set) elif "value-choices" in inp.keys(): msg_template = (" InputError: \"{0}\" must have at least" " one value choice") if len(inp["value-choices"]) < 1: errors += [msg_template.format(inp["id"])] else: errors += [] msg_template = " InputError: \"{0}\" cannot have default"\ " value outside its choices" if "default-value" in inp.keys(): if not isinstance(inp["default-value"], list): if inp["default-value"] not in inp["value-choices"]: errors += [msg_template.format(inp["id"])] else: for dv in inp["default-value"]: if dv not in inp["value-choices"]: errors += [msg_template.format(inp["id"])] else: errors += [] # Verify list-type inputs (min entries less than max, # no negative entries (both on min and max) if "list" in inp.keys(): msg_template = (" InputError: \"{0}\" cannot have greater min" " entries ({1}) than max entries ({2})") minn = inp.get("min-list-entries") or 0 maxx = inp.get("max-list-entries") or float("Inf") if minn > maxx: errors += [msg_template.format(inp["id"], minn, maxx)] else: errors += [] msg_template = (" InputError: \"{0}\" cannot have negative min" " entries ({1})") errors += [msg_template.format(inp["id"], minn) ] if minn < 0 else [] msg_template = (" InputError: \"{0}\" cannot have non-positive" " max entries ({1})") if maxx <= 0: errors += [msg_template.format(inp["id"], maxx)] else: errors += [] # Verify requires-inputs (present ids, non-overlapping) msg_template = " InputError: \"{0}\" required id \"{1}\" not found" if "requires-inputs" in inp.keys(): errors += [ msg_template.format(inp["id"], ids) for ids in inp["requires-inputs"] if ids not in inIds + grpIds ] # Verify disables-inputs (present ids, non-overlapping) msg_template = " InputError: \"{0}\" disables id \"{1}\" not found" if "disables-inputs" in inp.keys(): errors += [ msg_template.format(inp["id"], ids) for ids in inp["disables-inputs"] if ids not in inIds ] if "requires-inputs" in inp.keys() and "disables-inputs" in inp.keys(): msg_template = " InputError: \"{0}\" requires and disables \"{1}\"" errors += [ msg_template.format(inp["id"], ids1) for ids1 in inp["requires-inputs"] for ids2 in inp["disables-inputs"] if ids1 == ids2 ] # Verify required inputs cannot require or disable other parameters if "requires-inputs" in inp.keys() or "disables-inputs" in inp.keys(): msg_template = (" InputError: \"{0}\" cannot require or" " disable other inputs") if not inp["optional"]: errors += [msg_template.format(inp["id"])] # Verify value-disables/requires fields accompany value-choices if (("value-disables" in inp.keys() or "value-requires" in inp.keys()) and "value-choices" not in inp.keys()): msg_template = (" InputError: \"{0}\" cannot have have value-opts" " without value-choices defined.") errors += [msg_template.format(inp["id"])] if "value-choices" in inp.keys(): # Verify not value not requiring and disabling input if ("value-requires" in inp.keys() and "value-disables" in inp.keys()): msg_template = (" InputError: \"{0}\" choice \"{1}\" requires" " and disables \"{2}\"") errors += [ msg_template.format(inp["id"], choice, ids1) for choice in inp["value-choices"] for ids1 in inp["value-disables"][choice] if ids1 in inp["value-requires"][choice] ] for param in ["value-requires", "value-disables"]: if param in inp.keys(): # Verify disables/requires keys are the same as choices msg_template = (" InputError: \"{0}\" {1} list is not the" " same as the value-choices") if set(inp[param].keys()) != set(inp["value-choices"]): errors += [msg_template.format(inp["id"], param)] # Verify all required or disabled IDs are valid msg_template = (" InputError: \"{0}\" {1} id \"{2}\" not" " found") errors += [ msg_template.format(inp["id"], param, ids) for ids in inp[param].values() for item in ids if item not in inIds ] # Verify not requiring or disabling required inputs msg_template = (" InputError: \"{0}\" {1} cannot be used " "with required input \"{2}\"") errors += [ msg_template.format(inp["id"], param, member) for ids in inp[param].keys() for member in inp[param][ids] if not inById(member).get("optional") ] # Verify groups for idx, grpid in enumerate(grpIds): grp = descriptor['groups'][idx] # Verify group members must (exist in inputs, show up # once, only belong to single group) msg_template = " GroupError: \"{0}\" member \"{1}\" does not exist" errors += [ msg_template.format(grp["id"], member) for member in grp["members"] if member not in inIds ] msg_template = " GroupError: \"{0}\" member \"{1}\" appears twice" errors += [ msg_template.format(grp["id"], member) for member in set(grp["members"]) if grp["members"].count(member) > 1 ] # Verify mutually exclusive groups cannot have required members # nor requiring members, and that pairs of inputs cannot both be # in an all-or-none group if grp.get("mutually-exclusive"): msg_template = (" GroupError: \"{0}\" is mutually-exclusive" " and cannot have required members, " "such as \"{1}\"") errors += [ msg_template.format(grp["id"], member) for member in set(grp["members"]) if not inById(member)["optional"] ] msg_template = (" GroupError: \"{0}\" is mutually-exclusive" " and cannot have members require one another," " such as \"{1}\" and \"{2}\"") for member in set(grp["members"]): if "requires-inputs" in inById(member).keys(): errors += [ msg_template.format(grp["id"], member, req) for req in inById(member)["requires-inputs"] if req in set(grp["members"]) ] for jdx, grp2 in enumerate(descriptor["groups"]): if grp2.get("all-or-none"): msg_template = (" GroupError: mutually-exclusive group" " \"{0}\" and all-or-none group \"{1}\"" " cannot both contain input pairs \"{2}\"" " and \"{3}\"") errors += [ msg_template.format(grp["id"], grp2["id"], m1, m2) for m1 in grp["members"] for m2 in grp["members"] if m1 != m2 and m1 in grp2["members"] and m2 in grp2["members"] and idx != jdx ] # Verify one-is-required groups should never have required members # and that the group is not a subset of an all-or-none group if grp.get("one-is-required"): msg_template = (" GroupError: \"{0}\" is a one-is-required" " group and contains a required member, \"{1}\"") errors += [ msg_template.format(grp["id"], member) for member in set(grp["members"]) if member in inIds and not inById(member)["optional"] ] for jdx, grp2 in enumerate(descriptor["groups"]): if grp2.get("all-or-none"): msg_template = ( " GroupError: \"{0}\" is one-is-required" " and cannot be a subset of the all-or-none" " group \"{1}\"") if (set(grp["members"]).issubset(set(grp2["members"])) and idx != jdx): errors += [msg_template.format(grp["id"], grp2["id"])] # Verify all-or-none groups should never have required members if grp.get("all-or-none"): msg_template = (" GroupError: \"{0}\" is an all-or-none group" " and cannot be paired with one-is-required" " or mutually-exclusive groups") if grp.get("one-is-required") or grp.get("mutually-exclusive"): errors += [msg_template.format(grp["id"])] msg_template = (" GroupError: \"{0}\" is an all-or-none" " group and contains a required member, \"{1}\"") errors += [ msg_template.format(grp["id"], member) for member in set(grp["members"]) if member in inIds and not inById(member)["optional"] ] # Verify tests if "tests" in descriptor.keys(): tests_names = [] for test in descriptor["tests"]: tests_names.append(test["name"]) if "output-files" in test["assertions"].keys(): test_output_ids = safeGet(test["assertions"], "output-files", "id") # Verify if output reference ids are valid msg_template = ("TestError: \"{0}\" output id" " not found, in test \"{1}\"") errors += [ msg_template.format(output_id, test["name"]) for output_id in test_output_ids if (output_id not in outIds) ] # Verify that we do not have multiple output # references referring to the same id msg_template = ("TestError: \"{0}\" output id" " cannot appear more than once within" " same test, in test \"{1}\"") errors += [ msg_template.format(output_id, test["name"]) for output_id in set(test_output_ids) if (test_output_ids.count(output_id) > 1) ] # Verify that all the defined tests have unique names msg_template = "TestError: \"{0}\" test name is non-unique" errors += [ msg_template.format(test_name) for test_name in set(tests_names) if (tests_names.count(test_name) > 1) ] errors = None if errors == [] else errors if errors is None: if kwargs.get('format_output'): with open(json_file, 'w') as fhandle: fhandle.write(json.dumps(descriptor, indent=4, sort_keys=True)) return descriptor else: raise DescriptorValidationError("\n".join(errors))
def validate_descriptor(json_file, **kwargs): """ Validates the Boutiques descriptor against the schema. """ path, fil = op.split(bfile) schema_file = op.join(path, "schema", "descriptor.schema.json") # Load schema with open(schema_file) as fhandle: schema = json.load(fhandle) # Load descriptor descriptor = loadJson(json_file) # Validate basic JSON schema compliance for descriptor # Note: if it fails basic schema compliance we don"t do more checks try: validate(descriptor, schema) except ValidationError as e: raise_error(DescriptorValidationError, (str(e))) # Helper get functions def safeGet(desc, sec, targ): if desc.get(sec): return [ item.get(targ) for item in desc[sec] if list(item.keys()).count(targ) ] return [] def inputGet(s): return safeGet(descriptor, "inputs", s) def outputGet(s): return safeGet(descriptor, "output-files", s) def groupGet(s): return safeGet(descriptor, "groups", s) def inById(i): if i in inputGet("id"): return descriptor["inputs"][inputGet("id").index(i)] return {} # Begin looking at Boutiques-specific failures errors = [] clkeys = inputGet("value-key") + outputGet("value-key") flattenedTemplates = [y for x in outputGet("file-template") for y in x] configFileTemplates = flattenedTemplates + outputGet("path-template") cmdline = descriptor["command-line"] # Verify that all command-line key appear in the command-line, in # a file template or in an environment variable value msg_template = (" KeyError: \"{0}\" not in command-line or file template" " or environment variables") envValues = "" if descriptor.get('environment-variables'): for env in descriptor.get('environment-variables'): envValues += "||" + env['value'] errors += [ msg_template.format(k) for k in clkeys if ((cmdline + ".".join(configFileTemplates) + envValues).count(k)) < 1 ] # Verify that no key contains another key msg_template = " KeyError: \"{0}\" contains \"{1}\"" errors += [ msg_template.format(key, clkeys[jdx]) for idx, key in enumerate(clkeys) for jdx in range(0, len(clkeys)) if clkeys[jdx] in key and key != clkeys[jdx] ] # Verify that all Ids are unique inIds, outIds = inputGet("id"), outputGet("id") grpIds = groupGet("id") if "groups" in descriptor.keys() else [] allIds = inIds + outIds + grpIds msg_template = " IdError: \"{0}\" is non-unique" for idx, s1 in enumerate(allIds): for jdx, s2 in enumerate(allIds): if s1 == s2 and idx < jdx: errors += [msg_template.format(s1)] else: errors += [] # Verify that identical keys only exist if they are both in mutex groups msg_template = " MutExError: \"{0}\" belongs to 2+ non exclusive IDs" for idx, key in enumerate(clkeys): for jdx in range(idx + 1, len(clkeys)): if clkeys[jdx] == key: mids = [ inById(mid)["id"] for mid in inIds if inById(mid)["value-key"] == key ] for idx, grp in enumerate(descriptor.get("groups")): mutex = grp.get("mutually-exclusive") if set(grp["members"]) == set(mids) and not mutex: errors += [msg_template.format(key)] # Verify that output files have unique path-templates msg_template = ("OutputError: \"{0}\" and \"{1}\" have the same " "path-template") for ix, o1 in zip(outputGet("id"), outputGet("path-template")): for jx, o2 in zip(outputGet("id"), outputGet("path-template")): if o1 == o2 and jx != ix: errors += [msg_template.format(ix, jx)] else: errors += [] # Verify inputs for inp in descriptor["inputs"]: # Add optional property in case it's not # there (default to false as in JSON) if "optional" not in inp.keys(): inp["optional"] = False # Verify flag-type inputs (have flags, not required, cannot be lists) if inp["type"] == "Flag": msg_template = " InputError: \"{0}\" must have a command-line flag" if "command-line-flag" not in inp.keys(): errors += [msg_template.format(inp["id"])] else: errors += [] msg_template = " InputError: \"{0}\" is of type Flag,"\ " it has to be optional" if inp["optional"] is False: errors += [msg_template.format(inp["id"])] else: errors += [] # Verify number-type inputs min/max are sensible elif inp["type"] == "Number": msg_template = (" InputError: \"{0}\" cannot have greater" " min ({1}) than max ({2})") minn = inp["minimum"] if "minimum" in inp.keys() else -float("Inf") maxx = inp["maximum"] if "maximum" in inp.keys() else float("Inf") if minn > maxx: errors += [msg_template.format(inp["id"], minn, maxx)] else: errors += [] # Verify enum-type inputs (at least 1 option, default in set) elif "value-choices" in inp.keys(): msg_template = (" InputError: \"{0}\" must have at least" " one value choice") if len(inp["value-choices"]) < 1: errors += [msg_template.format(inp["id"])] else: errors += [] msg_template = " InputError: \"{0}\" cannot have default"\ " value outside its choices" if "default-value" in inp.keys(): if not isinstance(inp["default-value"], list): if inp["default-value"] not in inp["value-choices"]: errors += [msg_template.format(inp["id"])] else: for dv in inp["default-value"]: if dv not in inp["value-choices"]: errors += [msg_template.format(inp["id"])] else: errors += [] # Verify list-type inputs (min entries less than max, # no negative entries (both on min and max) if "list" in inp.keys(): msg_template = (" InputError: \"{0}\" cannot have greater min" " entries ({1}) than max entries ({2})") minn = inp.get("min-list-entries") or 0 maxx = inp.get("max-list-entries") or float("Inf") if minn > maxx: errors += [msg_template.format(inp["id"], minn, maxx)] else: errors += [] msg_template = (" InputError: \"{0}\" cannot have negative min" " entries ({1})") errors += [msg_template.format(inp["id"], minn) ] if minn < 0 else [] msg_template = (" InputError: \"{0}\" cannot have non-positive" " max entries ({1})") if maxx <= 0: errors += [msg_template.format(inp["id"], maxx)] else: errors += [] # Verify requires-inputs (present ids, non-overlapping) msg_template = " InputError: \"{0}\" required id \"{1}\" not found" if "requires-inputs" in inp.keys(): errors += [ msg_template.format(inp["id"], ids) for ids in inp["requires-inputs"] if ids not in inIds + grpIds ] # Verify disables-inputs (present ids, non-overlapping) msg_template = " InputError: \"{0}\" disables id \"{1}\" not found" if "disables-inputs" in inp.keys(): errors += [ msg_template.format(inp["id"], ids) for ids in inp["disables-inputs"] if ids not in inIds ] if "requires-inputs" in inp.keys() and "disables-inputs" in inp.keys(): msg_template = " InputError: \"{0}\" requires and disables \"{1}\"" errors += [ msg_template.format(inp["id"], ids1) for ids1 in inp["requires-inputs"] for ids2 in inp["disables-inputs"] if ids1 == ids2 ] # Verify required inputs cannot require or disable other parameters if "requires-inputs" in inp.keys() or "disables-inputs" in inp.keys(): msg_template = (" InputError: \"{0}\" cannot require or" " disable other inputs") if not inp["optional"]: errors += [msg_template.format(inp["id"])] # Verify value-disables/requires fields accompany value-choices if (("value-disables" in inp.keys() or "value-requires" in inp.keys()) and "value-choices" not in inp.keys()): msg_template = (" InputError: \"{0}\" cannot have have value-opts" " without value-choices defined.") errors += [msg_template.format(inp["id"])] if "value-choices" in inp.keys(): # Verify not value not requiring and disabling input if ("value-requires" in inp.keys() and "value-disables" in inp.keys()): msg_template = (" InputError: \"{0}\" choice \"{1}\" requires" " and disables \"{2}\"") errors += [ msg_template.format(inp["id"], choice, ids1) for choice in inp["value-choices"] for ids1 in inp["value-disables"][choice] if ids1 in inp["value-requires"][choice] ] for param in ["value-requires", "value-disables"]: if param in inp.keys(): # Verify disables/requires keys are the same as choices msg_template = (" InputError: \"{0}\" {1} list is not the" " same as the value-choices") if set(inp[param].keys()) != set(inp["value-choices"]): errors += [msg_template.format(inp["id"], param)] # Verify all required or disabled IDs are valid msg_template = (" InputError: \"{0}\" {1} id \"{2}\" not" " found") errors += [ msg_template.format(inp["id"], param, ids) for ids in inp[param].values() for item in ids if item not in inIds ] # Verify not requiring or disabling required inputs msg_template = (" InputError: \"{0}\" {1} cannot be used " "with required input \"{2}\"") errors += [ msg_template.format(inp["id"], param, member) for ids in inp[param].keys() for member in inp[param][ids] if not inById(member).get("optional") ] # Verify groups for idx, grpid in enumerate(grpIds): grp = descriptor['groups'][idx] # Verify group members must (exist in inputs, show up # once, only belong to single group) msg_template = " GroupError: \"{0}\" member \"{1}\" does not exist" errors += [ msg_template.format(grp["id"], member) for member in grp["members"] if member not in inIds ] msg_template = " GroupError: \"{0}\" member \"{1}\" appears twice" errors += [ msg_template.format(grp["id"], member) for member in set(grp["members"]) if grp["members"].count(member) > 1 ] # Verify mutually exclusive groups cannot have required members # nor requiring members, and that pairs of inputs cannot both be # in an all-or-none group if grp.get("mutually-exclusive"): msg_template = (" GroupError: \"{0}\" is mutually-exclusive" " and cannot have required members, " "such as \"{1}\"") errors += [ msg_template.format(grp["id"], member) for member in set(grp["members"]) if not inById(member)["optional"] ] msg_template = (" GroupError: \"{0}\" is mutually-exclusive" " and cannot have members require one another," " such as \"{1}\" and \"{2}\"") for member in set(grp["members"]): if "requires-inputs" in inById(member).keys(): errors += [ msg_template.format(grp["id"], member, req) for req in inById(member)["requires-inputs"] if req in set(grp["members"]) ] for jdx, grp2 in enumerate(descriptor["groups"]): if grp2.get("all-or-none"): msg_template = (" GroupError: mutually-exclusive group" " \"{0}\" and all-or-none group \"{1}\"" " cannot both contain input pairs \"{2}\"" " and \"{3}\"") errors += [ msg_template.format(grp["id"], grp2["id"], m1, m2) for m1 in grp["members"] for m2 in grp["members"] if m1 != m2 and m1 in grp2["members"] and m2 in grp2["members"] and idx != jdx ] # Verify one-is-required groups should never have required members # and that the group is not a subset of an all-or-none group if grp.get("one-is-required"): msg_template = (" GroupError: \"{0}\" is a one-is-required" " group and contains a required member, \"{1}\"") errors += [ msg_template.format(grp["id"], member) for member in set(grp["members"]) if member in inIds and not inById(member)["optional"] ] for jdx, grp2 in enumerate(descriptor["groups"]): if grp2.get("all-or-none"): msg_template = ( " GroupError: \"{0}\" is one-is-required" " and cannot be a subset of the all-or-none" " group \"{1}\"") if (set(grp["members"]).issubset(set(grp2["members"])) and idx != jdx): errors += [msg_template.format(grp["id"], grp2["id"])] # Verify all-or-none groups should never have required members if grp.get("all-or-none"): msg_template = (" GroupError: \"{0}\" is an all-or-none group" " and cannot be paired with one-is-required" " or mutually-exclusive groups") if grp.get("one-is-required") or grp.get("mutually-exclusive"): errors += [msg_template.format(grp["id"])] msg_template = (" GroupError: \"{0}\" is an all-or-none" " group and contains a required member, \"{1}\"") errors += [ msg_template.format(grp["id"], member) for member in set(grp["members"]) if member in inIds and not inById(member)["optional"] ] # Verify tests if "tests" in descriptor.keys(): tests_names = [] for test in descriptor["tests"]: tests_names.append(test["name"]) if "output-files" in test["assertions"].keys(): test_output_ids = safeGet(test["assertions"], "output-files", "id") # Verify if output reference ids are valid msg_template = ("TestError: \"{0}\" output id" " not found, in test \"{1}\"") errors += [ msg_template.format(output_id, test["name"]) for output_id in test_output_ids if (output_id not in outIds) ] # Verify that we do not have multiple output # references referring to the same id msg_template = ("TestError: \"{0}\" output id" " cannot appear more than once within" " same test, in test \"{1}\"") errors += [ msg_template.format(output_id, test["name"]) for output_id in set(test_output_ids) if (test_output_ids.count(output_id) > 1) ] # Verify that all the defined tests have unique names msg_template = "TestError: \"{0}\" test name is non-unique" errors += [ msg_template.format(test_name) for test_name in set(tests_names) if (tests_names.count(test_name) > 1) ] errors = None if errors == [] else errors if errors is None: if kwargs.get('format_output'): with open(json_file, 'w') as fhandle: fhandle.write(json.dumps(descriptor, indent=4, sort_keys=True)) return descriptor else: raise DescriptorValidationError("\n".join(errors))
def deprecate(zenodo_id, by_zenodo_id=None, sandbox=False, verbose=False, zenodo_token=None, download_function=urlretrieve): # Get the descriptor and Zenodo id puller = Puller([zenodo_id], verbose=verbose, sandbox=sandbox) descriptor_fname = puller.pull()[0] descriptor_json = loadJson(descriptor_fname, sandbox=sandbox, verbose=verbose) # Return if tool is already deprecated deprecated = descriptor_json.get('deprecated-by-doi') if deprecated is not None: if isinstance(deprecated, str): print_info('Tool {0} is already deprecated by {1} '.format( zenodo_id, deprecated)) if by_zenodo_id is not None: prompt = ("Tool {0} will be deprecated by {1}, " "this cannot be undone. Are you sure? (Y/n) ")\ .format(zenodo_id, by_zenodo_id) ret = input(prompt) if ret.upper() != "Y": return else: print_warning('Tool {0} is already deprecated'.format(zenodo_id)) return # Set record id and Zenodo id zhelper = ZenodoHelper(sandbox=sandbox, no_int=True, verbose=verbose) zid = zhelper.get_zid_from_filename(descriptor_fname) record_id = zhelper.get_record_id_from_zid(zid) # Return if tool has a newer version record = zhelper.zenodo_get_record(record_id) if not record['metadata']['relations']['version'][0]['is_last']: new_version = (record['metadata']['relations']['version'][0] ['last_child']['pid_value']) raise_error( DeprecateError, 'Tool {0} has a newer version ' '(zenodo.{1}), it cannot be deprecated.'.format( zenodo_id, new_version)) return # Add deprecated property if by_zenodo_id is None: descriptor_json['deprecated-by-doi'] = True else: # Check that by_zenodo_id exists by_record_id = zhelper.get_record_id_from_zid(by_zenodo_id) if zhelper.record_exists(by_record_id) is False: raise_error(DeprecateError, "Tool does not exist: {0}".format(by_zenodo_id)) # Assign deprecated-by property by_doi_id = zhelper.get_doi_from_zid(by_zenodo_id) descriptor_json['deprecated-by-doi'] = by_doi_id # Add doi to descriptor (mandatory for update) if descriptor_json.get('doi') is None: descriptor_json['doi'] = zhelper.get_doi_from_zid(zid) # Save descriptor in temp file tmp = tempfile.NamedTemporaryFile(delete=False, mode='w', suffix=".json") content = json.dumps(descriptor_json, indent=4, sort_keys=True) tmp.write(content) tmp.close() # Publish updated descriptor publisher = Publisher(tmp.name, zenodo_token, replace=True, sandbox=sandbox, no_int=True, id="zenodo." + zid, verbose=verbose) return publisher.publish()