def generateCSV(self, schemas): file = open("../docs/jsonBrowser/json_fields.csv", "w") file.write("Property,UserFriendlyName,Description,Schema,Required?\n") for path in schemas: schema = get_json_from_file(path) required = [] if "required" in schema: required = schema["required"] for property in schema["properties"]: file.write(property + "," + ("\"" +schema["properties"][property]["user_friendly"] +"\"" if "user_friendly" in schema["properties"][property] else "N/A") + "," + ("\"" + schema["properties"][property]["description"] + "\"" if "description" in schema["properties"][property] else "N/A") + "," + schema["title"] + "," + ("1" if property in required else "0") + "\n") file.close()
# Flag for tracking the exit status of validate() calls status_flag = True os.chdir('../json_schema') pwd = subprocess.check_output('pwd').decode("utf-8").rstrip() print('pwd: %s' % pwd) base_uri = "file://" + pwd + "/" print('base URI: %s' % base_uri) # Specific JSON file example tests # Testing valid project JSON example print('\nValidating type/project/project.json schema') sv = get_validator('type/project/project.json', base_uri) print('Validating project/test_pass_project_0.json JSON against schema') p1 = get_json_from_file( '../schema_test_files/project/test_pass_project_0.json') if not validate(sv, p1): status_flag = False # Testing invalid project JSON example # It is missing required project_shortname field print('\nValidating type/project/project.json schema') sv = get_validator('type/project/project.json', base_uri) print( 'Validating project/test_fail_project_0.json JSON against schema\n(This should fail, missing project_shortname)' ) p1 = get_json_from_file( '../schema_test_files/project/test_fail_project_0.json') if validate(sv, p1): status_flag = False
def generateMarkdown(self, schemas, entity_type): file = open("../docs/jsonBrowser/" + entity_type + ".md", "w") file.write("# " + entity_type.capitalize() + "\n") self.required_file.write("## " + entity_type.capitalize() + "\n") self._addBoilerPlate(file) for path in schemas: schema = get_json_from_file(path) if (entity_type == "module" or entity_type == "core"): file.write("## " + schema["title"] + "<a name='" + schema["title"] + "'></a>\n") self.required_file.write("### " + schema["title"] + "<a name='" + schema["title"] + "'></a>\n") else: file.write("## " + schema["title"] + "\n") self.required_file.write("### " + schema["title"] + "\n") file.write("_" + schema["description"] + "_\n") file.write("\n") file.write("Location: " + path.replace("../json_schema/", "") + "\n") file.write("\n") file.write( "Property name | Description | Type | Required? | Object reference? | User friendly name | Allowed values | Example \n" ) # file.write("Property name | Description | Type \n") file.write("--- | --- | --- | --- | --- | --- | --- | --- \n") required = [] if "required" in schema: required = schema["required"] self.required_file.write( "Property name | Description | Type | Object reference? | User friendly name | Allowed values | Example \n" ) self.required_file.write( "--- | --- | --- | --- | --- | --- | --- \n") else: self.required_file.write( "_There are no required properties in schema " + schema["title"] + "_\n") for property in schema["properties"]: if property not in boiler_plate: if "$ref" in schema["properties"][property]: ref = schema["properties"][property]["$ref"] if "definitions" not in ref: if "core" in ref: dir = "core" elif "module" in ref: dir = "module" else: dir = "" mod = ref.split("/")[-1] mod = mod.replace(".json", "") link = "[See " + dir + " " + mod + "](" + dir + ".md/#" + mod + ")" else: link = "" elif "items" in schema["properties"][ property] and "$ref" in schema["properties"][ property]["items"]: ref = schema["properties"][property]["items"]["$ref"] if "definitions" not in ref: if "core" in ref: dir = "core" elif "module" in ref: dir = "module" else: dir = "" mod = ref.split("/")[-1] mod = mod.replace(".json", "") link = "[See " + dir + " " + mod + "](" + dir + ".md/#" + mod + ")" else: link = "" else: link = "" # if link is not "": # print(schema["title"] + "\t "+ property + "\t"+ link) file.write( property + " | " + (schema["properties"][property]["description"] if "description" in schema["properties"][property] else "") + " | " + (schema["properties"][property]["type"] if "type" in schema["properties"][property] else "") + " | " + ("yes" if property in required else "no") + " | " + link + " | " + (schema["properties"][property]["user_friendly"] if "user_friendly" in schema["properties"][property] else "") + " | " + (", ".join(schema["properties"][property]["enum"]) if "enum" in schema["properties"][property] else "") + " | " + (str(schema["properties"][property]["example"]) if "example" in schema["properties"][property] else "") + "\n") if property in required: self.required_file.write( property + " | " + (schema["properties"][property]["description"] if "description" in schema["properties"][property] else "") + " | " + (schema["properties"][property]["type"] if "type" in schema["properties"][property] else "") + " | " + link + " | " + (schema["properties"][property]["user_friendly"] if "user_friendly" in schema["properties"][property] else "") + " | " + (", ".join(schema["properties"][property]["enum"] ) if "enum" in schema["properties"][property] else "") + " | " + (str(schema["properties"][property]["example"] ) if "example" in schema["properties"][property] else "") + "\n") file.write("\n") file.close()
def _gatherValues(self, basepath, schema, dependencies, local, userFriendly): if local: jsonRaw = get_json_from_file(schema) else: if basepath not in schema: schema = basepath + schema # get the schema of HTTP req = requests.get(schema) # if the schema is successfully retrieved, process it, else return an error message if (req.status_code == requests.codes.ok): jsonRaw = req.json() else: self.logger.error(schema + " does not exist") entities = {} entity_title = jsonRaw["title"] properties = jsonRaw["properties"] values = [] for prop in properties: # if a property has an array of references (potential 1-to-many relationship), gather the properties for the references and format them to become # their own spreadsheet tab if ("items" in properties[prop] and "$ref" in properties[prop]["items"] and "ontology" not in properties[prop]["items"]["$ref"]): module = properties[prop]["items"]["$ref"] if local: el = module.split("/") del el[0:3] del el[-2] module = basepath for e in el: module = module + "/" + e module = module + ".json" if module in dependencies: module_values = self._gatherValues(basepath, module, None, local, userFriendly) # add primary entity ID to cross reference with main entity for primary in values: if "id" in primary["header"].lower() or "shortname" in primary["header"]: for key in module_values.keys(): t = primary["header"] if "ID" in t: t = t.replace(" ID", "").lower() d = "ID for " + t + " this " + key + " relates to" else: t = t.replace(" shortname", "").lower() d = "Shortname for " + t + " this " + key + " relates to" module_values[key].append({"header": primary["header"], "description": d, "example": None}) break # special name cases for publication tabs if entity_title == "project" and "publication" in module_values.keys(): module_values["project.publications"] = module_values.pop("publication") if entity_title == "cell_line" and "publication" in module_values.keys(): module_values["cell_line.publications"] = module_values.pop("publication") entities.update(module_values) # if a property does not include a user_friendly tag but includes a reference, fetch the contents of that reference and add them # directly to the properties for this sheet elif("$ref" in properties[prop] and "ontology" not in properties[prop]["$ref"]): module = properties[prop]["$ref"] if local: el = module.split("/") del el[0:3] del el[-2] module = basepath for e in el: module = module + "/" + e module = module + ".json" if "_core" in module or module in dependencies: module_values = self._gatherValues(basepath, module, None, local, userFriendly) prefix = "" # if module in dependencies: if userFriendly: if "user_friendly" in properties[prop]: prefix = properties[prop]["user_friendly"] + " - " else: print(prop + " in " + entity_title + " has no user friendly name") else: prefix = prop + "." for key in module_values.keys(): for entry in module_values[key]: entry["header"] = prefix + entry["header"] values.extend(module_values[key]) # if a property has a user_friendly tag, include it as a direct field. This includes ontology module references as these should not be # exposed to users elif (userFriendly and "user_friendly" in properties[prop]): description = None example = None if "description" in properties[prop]: description = properties[prop]["description"] if "example" in properties[prop]: example = properties[prop]["example"] values.append({"header": properties[prop]["user_friendly"], "description": description, "example": example}) elif not userFriendly: if prop not in excluded_fields: description = None example = None if "description" in properties[prop]: description = properties[prop]["description"] if "example" in properties[prop]: example = properties[prop]["example"] if(("$ref" in properties[prop] and "ontology" in properties[prop]["$ref"]) or (("items" in properties[prop] and "$ref" in properties[prop]["items"]) and ("ontology" in properties[prop]["items"]["$ref"]))): prop = prop + ".text" values.append({"header": prop, "description": description, "example": example}) if "type/biomaterial" in schema: if userFriendly: values.append( {"header": "Process IDs", "description": "IDs of processes for which this biomaterial is an input", "example": None}) else: values.append( {"header": "process_ids", "description": "IDs of processes for which this biomaterial is an input", "example": None}) if "type/process" in schema: if userFriendly: values.append( {"header": "Protocol IDs", "description": "IDs of protocols which this process implements", "example": None}) else: values.append( {"header": "protocol_ids", "description": "IDs of protocols which this process implements", "example": None}) if "type/file" in schema: if userFriendly: values.append( {"header": "Biomaterial ID", "description": "ID of the biomaterial to which this file relates", "example": None}) values.append( {"header": "Sequencing process ID", "description": "ID of the sequencing process to which this file relates", "example": None}) else: values.append( {"header": "biomaterial_id", "description": "ID of the biomaterial to which this file relates", "example": None}) values.append( {"header": "process_id", "description": "ID of the sequencing process to which this file relates", "example": None}) entities[entity_title] = values return entities
# Flag for tracking the exit status of validate() calls status_flag = True os.chdir('../json_schema') pwd = subprocess.check_output('pwd').decode("utf-8").rstrip() base_uri = "file://" + pwd + "/" print(base_uri) print('\nValidating sample.json') sv = get_validator('sample.json', base_uri) # Specific schema tests follow print('\nValidating schema_test_files/10x_pbmc8k_donor_0.json') dt1 = get_json_from_file('../schema_test_files/10x_pbmc8k_donor_0.json') if not validate(sv, dt1): # will return False if fails (show return value) status_flag = False print('\nValidating schema_test_files/10x_pbmc8k_sample_0.json') sfo1 = get_json_from_file('../schema_test_files/10x_pbmc8k_sample_0.json') if not validate(sv, sfo1): status_flag = False print('\nValidating schema_tests/sample/fail/sample-test-current.json\n(This should fail)') sf1 = get_json_from_file('../schema_tests/sample/fail/sample-test-current.json') # This should fail. If it fails, keep status_flag = True if validate(sv, sf1): status_flag = False # Specific bundle tests follow
def lintSchema(self, path): schema = get_json_from_file(path) properties = schema['properties'] # SCHEMA-LEVEL CHECKS schema_filename = path.split("/")[-1].split(".")[0] # Check that all root level fields in the schema are part of the list of allowed root level fields for key in schema.keys(): if key not in allowed_root_level_keywords: print("Root level field `" + key + "` in schema " + path + " not part of allowed root level properties") # Check that all required root level fields are present in the schema for prop in required_root_level_keywords: if prop not in schema.keys(): print(schema_filename + ".json: Missing required root level field `" + prop + "`") # Check that additionalProperties is set to false if "additionalProperties" in schema and schema[ 'additionalProperties'] == True: print(schema_filename + ".json: Should not allow additional properties") # Check that $schema is set to draft-07 if "$schema" in schema and schema[ '$schema'] != "http://json-schema.org/draft-07/schema#": print( schema_filename + ".json: Must have $schema set to http://json-schema.org/draft-07/schema#" ) # Check that the name of the schema in the describedBy URL is set to the schema filename if properties['describedBy']['pattern'].split( "/")[-1] != schema_filename: print(schema_filename + ".json: End of `describedBy` URL (" + properties['describedBy']['pattern'].split("/")[-1] + ") must match schema filename (" + schema_filename + ")") # Check that the schema name attribute is set to the schema filename if "name" in schema and schema['name'] != schema_filename: print(schema_filename + ".json: The `name` attribute (" + schema['name'] + ") must match the schema filename (" + schema_filename + ")") # Check that schema type is set to object if "type" in schema and schema['type'] != "object": print(schema_filename + ".json: The `type` attribute must be set to object") # Check that all required fields are actually in the schema if "required" in schema: for req_prop in schema["required"]: if req_prop not in properties: print("Property `" + req_prop + "` is required in " + schema_filename + ".json but is undefined") # PROPERTY-LEVEL CHECKS # Check that essential properties `describedBy` and `schema_version` are present for ep in essential_properties: if ep not in properties: print(schema_filename + ".json: Missing required property `" + ep + "`") for property in properties: # print(property) # Check that property name contains only lowercase letters and underscore if not re.match("^[a-z_]+$", property) and property not in [ 'describedBy' ]: print(schema_filename + ".json: Property `" + property + "` contains non-lowercase/underscore characters") # Check that property contains description attribute if 'description' not in properties[property].keys(): print(schema_filename + ".json: Keyword `description` missing from property `" + property + "`") # Check that description attribute is a sentence - start with capital letter and end with full stop if 'description' in properties[property].keys() and not re.match( '^[A-Z][^?!]*[.]$', properties[property]['description']): print(schema_filename + ".json: The `description` for property `" + property + "` is not a sentence (" + properties[property]['description'] + ")") # Check that property contains user-friendly attribute # Currently excludes ingest-supplied fields # Currently excludes links.json and provenance.json if property not in [ 'provenance', 'schema_version', 'schema_type', 'describedBy' ] and 'user_friendly' not in properties[property].keys(): if schema_filename not in ["links", "provenance"]: print( schema_filename + ".json: Keyword `user_friendly` missing from property `" + property + "`") # Check that if property contains format attribute, format is valid JSON format if 'format' in properties[property].keys( ) and properties[property]['format'] not in [ "date", "date-time", "email" ]: print(schema_filename + ".json: Format `" + properties[property]['format'] + "` is not a valid JSON format)") # Check that guidelines attribute is a sentence if 'guidelines' in properties[property].keys() and not re.match( '^[A-Z][^?!]*[.]$', properties[property]['guidelines']): print(schema_filename + ".json: The `guidelines` for property `" + property + "` is not a sentence (" + properties[property]['guidelines'] + ")") # Check that property contains type attribute if 'type' not in properties[property].keys(): print(schema_filename + ".json: Keyword `type` missing from property `" + property + "`") else: # Check that 'type' attribute is set to one of the valid JSON types if properties[property]['type'] not in [ "string", "number", "boolean", "array", "object", "integer" ]: print(schema_filename + ".json: Type `" + properties[property]['type'] + "` is not a valid JSON type") # Check that property of type array also contains the attribute items if properties[property][ 'type'] == "array" and 'items' not in properties[ property].keys(): print(schema_filename + ".json: Property `" + property + "` is type array but doesn't contain items") # Check that a property of type array contains the attribute items and items has either the type or $ref attribute if properties[property][ 'type'] == "array" and 'items' in properties[ property].keys() and '$ref' not in properties[ property]['items'].keys( ) and 'type' not in properties[property][ 'items'].keys(): print( schema_filename + ".json: Property `" + property + "` is type array but items attribute doesn't contain type or $ref attribute" ) # Check that property of type object also contains the attribute $ref if properties[property][ 'type'] == "object" and '$ref' not in properties[ property].keys(): print(schema_filename + ".json: Property `" + property + "` is type object but doesn't contain $ref") # Check that property contains example attribute # Except for system-supplied fields # Except when importing module ($ref) if 'example' not in properties[property].keys( ) and property not in system_supplied_properties and schema_filename not in [ 'links', 'provenance' ]: if 'items' in properties[property].keys( ) and '$ref' not in properties[property]['items'].keys(): print(schema_filename + ".json: Keyword `example` missing from property `" + property + "`") elif 'items' not in properties[property].keys( ) and '$ref' not in properties[property].keys(): print(schema_filename + ".json: Keyword `example` missing from property `" + property + "`") # Check that there are 1 or 2 examples separated by semicolon # Excludes enums that list all valid values (Should be one of) elif 'example' in properties[property].keys( ) and property not in system_supplied_properties and schema_filename not in [ 'links', 'provenance' ]: if not re.match("^Should be one of", str(properties[property]['example'])): ex = str(properties[property]['example']).split(";") if len(ex) == 1 and re.search(",", ex[0]): print( schema_filename + ".json: Property `" + property + "` might have multiple examples that aren't separated by a semicolon (" + str(properties[property]['example']) + ")") # Check that _unit properties having matching property w/o unit if re.match("^[a-z_]+_unit$", property): if property.split("_unit")[0] not in properties: print(schema_filename + ".json: Has unit property `" + property + "` but no corresponding `" + property.split("_unit")[0] + "` property") for kw in properties[property].keys(): if property == 'ontology' and kw == 'graph_restriction': nested_keywords = properties[property][kw] for nkw in nested_keywords.keys(): if nkw not in ontology_keywords: print( "Keyword `" + nkw + "` is not in the list of acceptable ontology keyword properties" ) elif kw not in property_keywords: print( "Keyword `" + kw + "` in property `" + property + "` is not in the list of acceptable keyword properties" ) if isinstance(properties[property][kw], dict) and property != 'ontology': for nkw in properties[property][kw].keys(): if nkw not in property_keywords: print( "Keyword `" + nkw + "` in property `" + property + "` is not in the list of acceptable keyword properties" )