def handle_similar_tools(tool, tool_label): """link tools that have the same names""" clean_tool_label = Preprocessor.remove_bracket_content(tool_label) similars = onto.search(prefLabel=clean_tool_label + '*') if len(similars) > 0: for similar in similars: if clean_tool_label == Preprocessor.remove_bracket_content( similar.prefLabel[0]): tool.closeMatch.append(similar) similar.closeMatch.append(tool)
def get_task_type(full_name): task_type_partten = "\([a-zA-Z0-9*\-' ]+\)" task_types = re.findall(task_type_partten, full_name) if len(task_types) > 1: # tool.hasKeywords.append(OWLUtils.remove_parenthesis(task_types[0])) task_type = Preprocessor.remove_parenthesis( re.findall(task_type_partten, full_name)[-1]) else: task_type = Preprocessor.remove_parenthesis( re.findall(task_type_partten, full_name)[0]) return task_type
def map_to_owl(json_data): for d in json_data: """mapping json data to ontology properties""" # instance 实例 name = Preprocessor.name_underline(d['name']) toolClass = tool_class(d['name']) tool = toolClass(name, prefLabel=locstr(d['name'], lang='en')) tool.isToolOfSoftware.append(cyber.TauDEM) tool.identifier = name description = OWLUtils.join_list(d['description']) keywords = OWLUtils.to_keywords(description) keywords.extend(name.replace('_', ' ').split(' ')) # keywords=name.replace('_', ' ').split(' ') OWLUtils.link_to_domain_concept(tool, keywords) for k, v in d.items(): # 外层, 参数 if (k in ['parameters', 'options']) and type(v) == list: for i, item in enumerate(v): param = None # localname = v[i]['parameterName'] localname = Preprocessor.space_2_underline(item['parameterName']) _label = localname.replace('_', ' ') if item['isInputFile']: param = TauDEMInput(localname, prefLabel=locstr(_label, lang='en')) # param = TauDEMInput(prefLabel=locstr(_label, lang='en')) # input geo data ? rule: format-> geoformat->geo data tool.input.append(param) handle_params(param, item) param.isInput = True elif item['isOutputFile']: # localname.lower().startswith('output_', 0, len('output_')): param = TauDEMOutput(localname, prefLabel=locstr(_label, lang='en')) # param = TauDEMOutput(prefLabel=locstr(_label, lang='en')) tool.output.append(param) handle_params(param, item) param.isOutput = True else: param = TauDEMOption(localname, prefLabel=locstr(_label, lang='en')) # param = TauDEMOption(prefLabel=locstr(localname.replace('_', ' '), lang='en')) tool.option.append(param) handle_params(param, item) OWLUtils.link_to_domain_concept(param, _label) param.identifier = localname else: prop = get_property(k, DataProperty) if not v: continue if type(v) == list and len(v) > 0: v = ''.join(v) OWLUtils.set_data_property(tool, prop, v) # task handle_task(tool, name, d['name'], description) OWLUtils.application_category(tool, 'Geomorphometry', 'digital terrain analysis', 'hydrology')
def map_to_owl(json_data): for d in json_data: """mapping json data to ontology properties""" name = Preprocessor.toolname_underline(d['name']) # name = re.sub("[()-*,/]", " ", name).strip() executable = Preprocessor.normalize("saga_cmd ", d['command']['exec']) keywords = d['keywords'] toolClass = tool_class(keywords) if onto[name]: # if has the same name and executable if onto[name].executable == executable: onto[name].is_a.append(toolClass) continue else: name = name + '_' + keywords[0].replace(' ', '_') tool = toolClass(name, prefLabel=locstr(re.sub('^(Tool)[0-9: ]+', '', d['name']), lang='en')) # tool = toolClass(Preprocessor.space_2_underline(name), prefLabel=locstr(re.sub('^(Tool)[0-9: ]+', '', d['name']), lang='en')) tool.isToolOfSoftware.append(cyber.SAGA_GIS) tool.identifier = name tool.manualPageURL.append(d['manual_url']) # task handle_task(tool, name, d['name'], keywords, OWLUtils.join_list(d['description'])) tool.executable = executable tool.commandLine.append( Preprocessor.normalize("Usage: ", d['command']['cmd_line'])) tool.authors.append(OWLUtils.join_keywords(d['authors'])) for reference in d['references']: tool.references.append(reference) # keywords keywords.append(name.replace('_', ' ')) OWLUtils.link_to_domain_concept(tool, keywords) # applicaiton category OWLUtils.application_category(tool, [d['keywords'][0]], d['keywords'][1], d['keywords'][2:]) tool.description.append(OWLUtils.join_list(d['description'])) if d['parameters']: for item, itemValue in d['parameters'].items(): if item == 'inputs': handle_inout(tool, itemValue, 'input') elif item == 'outputs': handle_inout(tool, itemValue, 'output') elif item == 'options': for optionItem in itemValue: handle_options(tool, optionItem, onto)
def clean(_preflabel): oldlabel = re.sub("\([a-z/.,A-Z0-9 ]+\)", '', _preflabel[0]).lower().strip() altLabels = [] if '>' in oldlabel: oldlabel = oldlabel.replace('>', 'grater_than') elif '<' in oldlabel: oldlabel = oldlabel.replace('<', 'less_than') if '/' in oldlabel: allLabels = oldlabel.split('/') newlabel = allLabels[-1:][0] # too many items exclude = [ 'nasa', 'doi', 'doc', 'usda', 'ca', 'epa', 'jp', 'epa', 'doe', 'uwa', 'r', 'eu', 'de', 'dhhs', 'dmsp' ] if newlabel in exclude: newlabel = oldlabel.replace('/', '_') altLabels = allLabels[0:-1] else: newlabel = oldlabel newlabel = Preprocessor.replace_2_underline( '[ -_]+', newlabel).strip() #newlabel.replace(' - ', '_').replace(' ', '_') print(newlabel) return newlabel, altLabels
def handle_inout(tool, item_value, in_or_out): for ioD in item_value: # print(ioD) io_name = ioD['name'] if io_name is None: io_name = in_or_out _name = Preprocessor.io_name(io_name, onto) param_rdf = None if in_or_out == 'input': param = SagaInput(_name, prefLabel=locstr(io_name, lang='en')) # param = SagaInput(0,prefLabel=locstr(io_name, lang='en')) # blank node prefix with _: tool.input.append(param) param.isInput = True # rdflib param_rdf = URIRef(param.iri) with onto: g.add((param_rdf, RDF.type, Sh.NodeShape)) g.add((param_rdf, RDF.type, URIRef(SagaInput.iri))) else: param = SagaOutput(_name, prefLabel=locstr(io_name, lang='en')) # param =SagaOutput(0, prefLabel=locstr(io_name, lang='en')) tool.output.append(param) param.isOutput = True # rdflib param_rdf = URIRef(param.iri) with onto: g.add((param_rdf, RDF.type, Sh.NodeShape)) g.add((param_rdf, RDF.type, URIRef(SagaOutput.iri))) if ioD['dataType']: vr = re.match("[a-zA-Z ]+ (?=\([a-zA-Z ]+\))?", ioD['dataType']) dformat = vr.group().strip() if not get_format(dformat): continue param.supportsDataFormat.append(data[get_format(dformat)]) # rdflib formatshape = g.BNode() with onto: g.add((param_rdf, Sh.property, formatshape)) g.add((formatshape, RDF.type, Sh.PropertyShape)) g.add((formatshape, Sh.path, Cyber.supportsDataFormat)) formats = g.BNode() with onto: g.add((formats, RDF.first, [data[get_format(dformat)]])) g.add((formats, RDF.rest, RDF.nil)) c = Collection(g, formats) g.add((formatshape, Sh['in'], c)) param.identifier = ioD['name'] param.description.append(ioD['description']) param.flag = ioD['flag'] param.isOptional = ioD['isOptional'] OWLUtils.link_to_domain_concept(param, io_name.replace('_', ' ')) # shacl pshape = Sh.PropertyShape(0) pshape.path = onto.dataContent if not ioD['isOptional']: pshape.minCount = 1 pshape.message.append(ioD['name'] + " is required!")
def topic_classes(): config = OWLUtils.get_config(module_path + '/config.ini') for k, v in config.items('application'): k = Preprocessor.to_upper_camel_case(k, True) topic_cls = OWLUtils.create_onto_class(onto, k, TopicCategoryTool) for _tool in v.split(','): _tool = _tool.strip() if onto[_tool] is not None: onto[_tool].is_a.append(topic_cls)
def handle_options(tool, option, _onto): name = option['name'] if name is None: name = 'option' _name = Preprocessor.io_name(Preprocessor.name_underline(name), _onto) op = SagaOption(_name, prefLabel=locstr(name, lang='en')) tool.option.append(op) if option['description'] != '-': op.description = option['description'] op.flag = option['flag'] op.identifier = name constraints = option['constraints'] # shacl pshape = Sh.PropertyShape(0) pshape.path.append(_onto.dataContent) if constraints: if 'fields_des' in constraints.keys() and constraints['fields_des']: op.description.append(constraints['fields_des']) else: if 'minimum' in constraints.keys() and constraints['minimum']: op.minimum = constraints['minimum'] pshape.minExclusive = constraints['minimum'] if 'defaultValue' in constraints.keys( ) and constraints['defaultValue']: op.defaultValue = constraints['defaultValue'] pshape.defaultValue = constraints['defaultValue'] if 'maximum' in constraints.keys() and constraints['maximum']: op.maximum = constraints['maximum'] pshape.maxInclusive = constraints['maximum'] op.datatypeInString.append(option['dataType']) pshape.datatype = [OWLUtils.get_datatype_iris(option['dataType'])] op.datatype.append(OWLUtils.get_datatype_iris(option['dataType'])) if 'availableChoices' in constraints.keys( ) and constraints['availableChoices']: c = [] for achoice in constraints['availableChoices']: c.append(achoice['choice']) with _onto: g.add((pshape, Sh['in'], c)) OWLUtils.handle_choices(op, name, constraints['availableChoices'], SagaAvailableChoice, _onto)
def gen_shacl(json_data): for d in json_data: name = Preprocessor.toolname_underline(d['name']) name = re.sub("[()-*,/]_", " ", name).strip() if d['parameters']: for item, itemValue in d['parameters'].items(): if item == 'options': for optionItem in itemValue: shape = URIRef(saga_uri + name.replace(" ", '') + 'Shape') graph.add((shape, RDF.type, sh.ShapeNode)) graph.add( (shape, sh.TargetNode, URIRef(saga_uri + name))) shapeGraph = BNode()
def handle_parameters(tool, param, _onto): # 部分parameter不包含isInputFile等属性 name = param['parameter'] _name = Preprocessor.io_name(param['parameter'], _onto, ['overwrite']) if 'isInputFile' in param.keys() and param['isInputFile']: p = GrassInput(_name, prefLabel=locstr(name, lang='en')) # p = GrassInput(0, prefLabel=locstr(name, lang='en')) tool.input.append(p) p.isInput = param['isInputFile'] OWLUtils.link_to_domain_concept(p, name.replace('_', ' ')) elif 'isOutputFile' in param.keys() and param['isOutputFile']: p = GrassOutput(_name, prefLabel=locstr(name, lang='en')) # p = GrassOutput(0, prefLabel=locstr(name, lang='en')) tool.output.append(p) p.isOutput = param['isOutputFile'] OWLUtils.link_to_domain_concept(p, name.replace('_', ' ')) else: p = GrassOption(_name, prefLabel=locstr(name, lang='en')) # p = GrassOption(0, prefLabel=locstr(name, lang='en')) tool.option.append(p) p.flag = param['flag'] p.identifier = name if 'dataType' in param.keys(): p.datatypeInString.append(param['dataType']) p.datatype.append(OWLUtils.get_datatype_iris(param['dataType'])) p.description.append(param['explanation']) if 'defaultValue' in param.keys(): if param['defaultValue'] is not None: p.defaultValue = param['defaultValue'] p.isOptional = param['isOptional'] if 'alternatives' in param.keys(): alternatives = param['alternatives'] if alternatives: literal = True if param['explanation'] == "Name of Modules": literal = False if len(alternatives) > 1: _onto, l = OWLUtils.resources_2_rdf_list( _onto, alternatives, literal) p.availableList.append(l) for value in alternatives: p.availableValue.append(value) else: if re.match('[-0-9]+-[0-9]+', alternatives[0]): p.minimum = alternatives[0].rsplit('-', 1)[0] p.maximum = alternatives[0].rsplit('-', 1)[1] elif re.match('[-0-9]+-[a-zA-Z<> ]+', alternatives[0]): p.minimum = alternatives[0].split('-')[0] p.comment.append('Options: ' + ' '.join(alternatives))
def handle_task(tool, task_name, des): config = OWLUtils.get_config(module_path + '/config.ini') category = tool_class(task_name) task_cls = config.get('task', category.name) task_name = Preprocessor.space_2_underline(task_name.replace(".py", "")) if not task[task_name + "_task"]: task_ins = task[task_cls](task_name + "_task", prefLabel=locstr(task_name + " task", lang='en')) task_ins.isAtomicTask = True task_ins.identifier = task_name else: task_ins = task[task_name + "_task"] if (task_ins in tool.usedByTask) is False: tool.usedByTask.append(task_ins) if (tool in tool.processingTool) is False: task_ins.processingTool.append(tool) task_ins.description.append(locstr(des, lang='en'))
def handle_options(tool, param, _onto): pname = param['name'] _name = Preprocessor.io_name(pname, _onto, common_options) p = GDALOption(_name, prefLabel=locstr(pname, lang='en')) p.identifier = pname if param['flag']: p.flag = param['flag'] p.isOptional = param['isOptional'] p.description.append(locstr(param['explanation'], lang='en')) p.datatype.append(OWLUtils.get_datatype_iris(param['dataType'])) if "available_values" in param.keys(): for value in param['available_values']: p.availableValue.append(value) onto, _list = OWLUtils.resources_2_rdf_list(_onto, param['available_values']) p.availableList.append(_list) if "available_choices" in param.keys(): p, onto = OWLUtils.handle_choices(p, pname, param['available_choices'], GDALAvailableChoice, _onto) if "input_pattern" in param.keys(): p.inputPattern.append(param['input_pattern']) tool.option.append(p)
def handle_parameter(tool, param): pname = param['name'] p = None _name = Preprocessor.io_name(pname, onto, common_options) if 'isInputFile' in param.keys(): p = GDALInput(_name, prefLabel=locstr(pname, lang='en')) p.isInput = True tool.input.append(p) OWLUtils.link_to_domain_concept(p, pname.replace('_', ' ')) elif "isOutputFile" in param.keys(): p = GDALOutput(_name, prefLabel=locstr(pname, lang='en')) p.isOutput = True tool.output.append(p) OWLUtils.link_to_domain_concept(p, pname.replace('_', ' ')) p.identifier = pname if param['flag']: p.flag = param['flag'] p.isOptional = param['isOptional'] p.description.append(locstr(param['explanation'], lang='en')) p.datatype.append(OWLUtils.get_datatype_iris(param['dataType']))
def map_to_owl(json_data): for d in json_data: toolClass = tool_class(d['category']) name = Preprocessor.toolname_underline(d['title']) tool = toolClass(name, prefLabel=locstr(d['title'], lang='en')) tool.isToolOfSoftware.append(cyber.Whitebox_Tools) tool.identifier = name tool.manualPageURL.append('https://github.com/jblindsay/whitebox-tools/blob/master/manual/WhiteboxToolsManual.md') tool.executable = 'whitebox_tools' tool.commandLine.append(d['parameter_commandline'][0]) tool.description.append(locstr(d['description'], lang='en')) keywords = OWLUtils.to_keywords(d['description']) keywords.extend(d['title'].split(" ")) OWLUtils.link_to_domain_concept(tool, keywords) handle_task(tool, d['category'], d['title'], d['description']) OWLUtils.application_category(tool, [], d['category'].replace(' Tools', ''), []) for parameter in d['parameter']: handle_parameter(tool, parameter)
def handle_parameters(tool, param): # 部分parameter不包含isInputFile等属性 _name = Preprocessor.io_name(param['name'], onto) if 'isInputFile' in param.keys() and param['isInputFile']: p = ArcGISInput(_name, prefLabel=locstr(param['name'], lang='en')) # p = ArcGISInput(0, prefLabel=locstr(param['name'], lang='en')) tool.input.append(p) p.isInput = param['isInputFile'] OWLUtils.link_to_domain_concept(p, param['name'].replace('_', ' ')) elif 'isOutputFile' in param.keys() and param['isOutputFile']: p = ArcGISOutput(_name, prefLabel=locstr(param['name'], lang='en')) # p = ArcGISOutput(0, prefLabel=locstr(param['name'], lang='en')) tool.output.append(p) p.isOutput = param['isOutputFile'] OWLUtils.link_to_domain_concept(p, param['name'].replace('_', ' ')) else: p = ArcGISOption(_name, prefLabel=locstr(param['name'], lang='en')) # p = ArcGISOption(0, prefLabel=locstr(param['name'], lang='en')) tool.option.append(p) dt = param['dataType'] if dt: p.datatypeInString.append(param['dataType']) p.datatype.append(OWLUtils.get_datatype_iris(param['dataType'])) OWLUtils.link_to_domain_concept(p, param['name'].replace('_', ' ')) p.identifier = param['name'] p.flag = param['name'] if 'dataType' in param.keys() and param['dataType']: p.datatypeInString.append(param['dataType']) p.description.append(param['description']) p.isOptional = param['isOptional'] # datatype datatype = param['dataType'] if datatype is None: datatype = "string" dt = datatype.strip().lower().replace(' ', '_') # print(dt) dtype = data[dt] if dtype is None: dtype = OWLUtils.get_datatype_iris(dt) p.datatype.append(dtype) if "available_values" in param.keys(): for value in param['available_values']: p.availableValue.append(value)
def map_to_owl(json_data): for d in json_data: name = d['name'] toolClass = tool_class(name) name = Preprocessor.space_2_underline(name) tool = toolClass(name, prefLabel=locstr(name, lang='en')) tool.isToolOfSoftware.append(cyber.GDAL) tool.identifier = name tool.definition = d['summary'] tool.manualPageURL.append(d['manual_url']) tool.executable = d['exec'] tool.commandLine.append(d['syntax']) tool.description.append(locstr(d['description'], lang='en')) OWLUtils.application_category(tool, [], ['GIS Analysis'], [str(toolClass.name).replace('Programs', '')]) keywords = OWLUtils.to_keywords(d['description']) OWLUtils.link_to_domain_concept(tool, keywords) for example in d['example']: tool.example.append(example) handle_task(tool, d['name'], d['description']) for parameter in d['parameters']: handle_parameter(tool, parameter) for option in d['options']: handle_options(tool, option, onto)
def handle_task(tool, tool_name, en_str, _keywords, desc): config = OWLUtils.get_config(module_path + '/config.ini') tasks = config.options('task') for task_item in tasks: # print(task_item) if task_item in _keywords: task_cls = config.get('task', task_item) task_name = Preprocessor.task_name(tool_name) if task[task_name] is None: task_ins = task[task_cls]( task_name, prefLabel=locstr(en_str.replace('Tool', '') + " task", lang='en')) # task_ins = task[task_cls](tool_name + "_task", prefLabel=locstr(en_str.replace('Tool', '') + " task", lang='en')) task_ins.description.append(locstr(desc, lang='en')) task_ins.isAtomicTask = True task_ins.identifier = task_name else: task_ins = task[task_name] if (task_ins in tool.usedByTask) is False: tool.usedByTask.append(task_ins) if (tool in tool.processingTool) is False: task_ins.processingTool.append(tool)
def handle_parameter(tool, param): # p = None dformat = file_type(param['flag'], tool.commandLine[0]) ptype = param_type(param['description']) if 'flag_long' in param.keys(): pname = param['flag_long'].replace('--', '') else: pname = param['flag'].replace('--', '') _name = Preprocessor.io_name(pname, onto) if ptype == 1: p = WhiteboxInput(_name, prefLabel=locstr(pname, lang='en')) # p = OTBInput(0, prefLabel=locstr(param['name'], lang='en')) tool.input.append(p) p.isInput = True OWLUtils.link_to_domain_concept(p, pname.replace('_', ' ')) elif ptype == 2: p = WhiteboxOutput(_name, prefLabel=locstr(pname, lang='en')) # p = OTBOutput(0, prefLabel=locstr(param['parameter_name'], lang='en')) tool.output.append(p) p.isOutput = True OWLUtils.link_to_domain_concept(p, pname.replace('_', ' ')) elif ptype == 3: p = WhiteboxOption(_name, prefLabel=locstr(pname, lang='en')) tool.option.append(p) p.isOptional = True OWLUtils.link_to_domain_concept(p, pname.replace('_', ' ')) else: p = WhiteboxOption(_name, prefLabel=locstr(pname, lang='en')) tool.option.append(p) avaliable_choices(p, param['description']) if dformat: p.supportsDataFormat = dformat p.identifier = pname p.flag = param['flag'] if 'flag_long' in param.keys(): p.longFlag.append(param['flag_long']) p.description.append(locstr(param['description'], lang='en'))