class McStasComponentParser(object): def __init__(self, filename=None, config=None, parse=True): self._filename = filename self._config = config # OrderedDict? # Header self._headerstr = "" # Non-parsed header self._header = OrderedDict() # Parsed header self._inputparams = OrderedDict() # Dictionary of input parameters self._outputparams = OrderedDict() # Dictionary of output parameters # Body self._sections = {} # Sections self._defs = {} # Definitions if parse and (self._fileExists() or config): self.parse() def parse(self): """ Parses data from config string or file and populates header structure """ configText = self._configText() bodyText = self._parseHeader(configText) # Parse header self._parseBody(bodyText) def header(self): "Returns header" return self._header def sections(self): "Returns sections" return self._sections def definitions(self): "Returns definitions" return self._defs def inputparams(self): "Returns input parameters" return self._inputparams def outputparams(self): "Returns output parameters" return self._outputparams def toString(self, br="\n"): str = "" for (key, value) in self._header.iteritems(): str += "%s: %s%s" % (key, value, br) str += br for (key, value) in self._sections.iteritems(): str += "%s: %s%s" % (key, value, br) str += br for (key, value) in self._defs.iteritems(): str += "%s: %s%s" % (key, value, br) return str def _parseHeader(self, origText): "Parses header and populates header dictionary" p = re.compile(COMMENT, re.DOTALL) matches = p.findall(origText) if len(matches) < 1: # No header found return origText m = matches[0] # First comment is the header self._headerstr = m text = self._strip(WINCR, m) # Strip carriage return headertext = self._strip(STAR, text) # Strip stars # Extract sections from headertext (hide them?) info = self._sectionText(INFO_SEC, headertext) desc = self._sectionText(DESC_SEC, headertext) param = self._sectionText(PARAM_SEC, headertext) self._parseCompName(headertext) self._parseInfoSection(info) self._parseDescSection(desc) self._parseParamSection(param) # Find end position of the header end = self._headerEnd(origText) return origText[end:] def _parseBody(self, bodyText): "Parses body and populates body dictionary" bodytext = self._cleanupText(bodyText) self._parseDefComp(bodytext) self._parseDefParams(bodytext) self._parseSetParams(bodytext) self._parseOutParams(bodytext) self._parseStateParams(bodytext) self._parsePolParams(bodytext) self._parseBodySections(bodytext) def _cleanupText(self, text): "Cleans up text" temptext = self._strip(WINCR, text) # Strip carriage return temptext = self._strip(COMMENT, temptext) # Strip C comments (/*...*/) # Don't strip C++ comments as it make Jiao's hack die :) #temptext = self._strip(CPP_COMMENT, temptext)# Strip C++ comments (//...) return temptext def _parseDefComp(self, text): "Parses Define Component" name = "" value = self._defValues(DEF_COMP, text) if value: name = value self._defs["name"] = name def _parseDefParams(self, text): "Parses and sets arameters" self._setDefParams(DEF_PARAMS, text, "definition_parameters") def _setDefParams(self, regex, text, paramname): "Parses and parameters and" params = [] value = self._defValues(regex, text) if value: params = self._defParams(value) self._defs[paramname] = params def _defParams(self, line): "Returns definition parameters as dictionary" # Format: [<type>]{spaces}<variable>{spaces}[={spaces}<value>] # Example: line = "string XX, string YY =1, ZZ , WW= 2" params = [] items = line.strip(" ()\n").split(",") for it in items: var = it.strip() # Doesn't work well #match = self._defValues(PARAM_VAR, var, None) match = self._paramMatch(var) assert len(match) == 3 if match[1] == "": # If name is empty, return empty list return [] param = {} param["type"] = match[0] param["name"] = match[1] param["value"] = match[2] params.append(param) return params def _paramMatch(self, var): """ Returns tuple: (<type>, <name>, <value>). Example: ("string", "filename", "'IE.dat'") """ type = "" name = "" value = "" if not var: return (type, name, value) parts = var.split("=") if len(parts) == 2: value = parts[1].strip() # Get value if it exists value = value.strip("\"'") # Strip quotation marks? # Catching pointer variable parts2 = parts[0].split("*") if len(parts2) == 2: type = "%s *" % parts2[0].strip() name = parts2[1].strip() return (type, name, value) # Catching non-pointer variable varparts = parts[0].split() if len(varparts) == 2: type = varparts[0].strip() name = varparts[1].strip() elif len(varparts) == 1: name = varparts[0].strip() return (type, name, value) def _parseSetParams(self, text): "Parses Setting Parameters" self._setDefParams(SET_PARAMS, text, "setting_parameters") def _parseOutParams(self, text): "Parses Output Parameters" self._setDefParams(OUT_PARAMS, text, "output_parameters") def _parseStateParams(self, text): "Parses State Parameters" self._setListParams(STATE_PARAMS, text, "state_parameters") def _parsePolParams(self, text): "Parses Polarization Parameters" self._setListParams(POL_PARAMS, text, "polarization_parameters") def _setListParams(self, regex, text, paramname): "Parses text and populates list parameters in defintion part" params = [] value = self._defValues(regex, text) if value: items = value.strip(" ()").split(",") # Strip brackets just in case for it in items: # clean up params params.append(it.strip()) self._defs[paramname] = params def _defValues(self, regex, text, flags=re.DOTALL|re.IGNORECASE|re.MULTILINE): "Returns matches for regex pattern. Used mostly for definitions" p = re.compile(regex) if flags: p = re.compile(regex, flags) matches = p.findall(text) if len(matches) < 1: # No value found return None m = matches[0] if type(m) is str: return m.strip() # If value is string, strip spaces return m # otherwise return as they are def _parseBodySections(self, text): "Parse body sections" for secname in BODY_SECTIONS: p = re.compile(sectionRegex(secname), re.DOTALL|re.IGNORECASE) matches = p.findall(text) secname = secname.lower() # Turn section name lower case if secname == FINALLY.lower(): # Special case for "FINALLY" section secname = "finalize" if len(matches) < 1: # No section found self._sections[secname] = "" continue mm = matches[0] if len(mm) != 2: # Section content is empty self._sections[secname] = "" continue self._sections[secname] = mm[1] def _configText(self): "Take config from file if it exist and readable, or use from config - otherwise" configText = "" if self._fileExists(): try: # Try to read it configText = open(self._filename).read() except: pass # No exception return configText if self._config: configText = self._config return configText # Empty string def _fileExists(self): "Checks if file exists" if self._filename and os.path.exists(self._filename): return True return False def _strip(self, regex, text): "Strips piece of text that matches regex pattern" p = re.compile(regex, re.DOTALL|re.MULTILINE) s = re.sub(p, '', text) return s def _parseCompName(self, text): p = re.compile(COMP_NAME, re.IGNORECASE) namefinds = p.findall(text) if not namefinds: return "" # Empty name compname = namefinds[0].strip() self._header["componentname"] = compname # XXX: integrate with _defValue() def _sectionText(self, secregex, text, flags=re.DOTALL): "Returns section string that matches secregex pattern" p = re.compile(secregex) if flags: p = re.compile(secregex, flags) matches = p.findall(text) if len(matches) < 1: # No section found, return empty string return "" return matches[0] # Return the first found match # XXX: Merge with _populateParams() def _parseInfoSection(self, text): "Parses info section and populates part of header parameters" # XXX: There might be problems that description has ':' character # In this case check if numbr == 2 and afterparam = True lines = text.split("\n") for l in lines: l = l.strip() if l == '': continue # Skip empty line p = re.compile(PARAM) m = p.match(l) if m: param = m.group(1).strip() value = m.group(2).strip() paramname = self._paramName(param) if not paramname: continue paramname = paramname.lower() self._header[paramname] = value else: self._header["simple_description"] = l def _paramName(self, param): """ Returns parameter name. Note: Only those parameter which are in INFO_PARAMS will be returned """ # Non standard parameter if self._isMatch(COPYRIGHT_N, param): return "copyright" # Standard parameters for regex in STD_PARAMS: if self._isMatch(regex, param): return param return None def _isMatch(self, regex, text): "Returns True if matches, False - otherwise" p = re.compile(regex, re.IGNORECASE) m = p.match(text) if m: return True # There is the match return False def _parseDescSection(self, text): "Parses description section and populates part of header parameters" # Find example p = re.compile(EXAMPLE, re.DOTALL|re.IGNORECASE) matches = p.findall(text) example = "" # Default value if len(matches) >= 1: # No section found, return empty string mstr = matches[0] # Take first match! if mstr: example = " ".join(mstr.strip(" \n").split("\n")) self._header["example"] = example # Get full description: strip example and take whatever is left text = self._strip(EXAMPLE, text) self._header["full_description"] = text.strip() def _parseParamSection(self, text): "Parses parameter section and populates input and output parameters of header" # Get output parameters first! outputtext = self._sectionText(OUTPUT_PARAMS, text, flags=re.DOTALL|re.IGNORECASE) filteredtext = self._strip(OUTPUT_PARAMS, text) # ... and then input parameters inputtext = self._sectionText(INPUT_PARAMS, filteredtext, flags=re.DOTALL|re.IGNORECASE) self._parseInputSubsection(inputtext) self._parseOutputSubsection(outputtext) def _parseInputSubsection(self, text): "Parses input text and populates input parameters" self._inputparams = self._populateParams(IOPARAM, text) self._header["input_parameters"] = self._inputparams def _parseOutputSubsection(self, text): "Parses output text and populates output parameters" self._outputparams = self._populateParams(IOPARAM, text) self._header["output_parameters"] = self._outputparams def _populateParams(self, paramregex, text): "Populates dictionary of parameters" params = {} lines = text.split("\n") for l in lines: l = l.strip() if l == '': continue # Skip empty line p = re.compile(paramregex) m = p.match(l) if m: (param, value) = (m.group(1).strip(), m.group(2).strip()) # XXX: What if value has '\n'? if not param: continue params[param] = value return params def _headerEnd(self, origText): "Returns end position of the header" p = re.compile(COMMENT, re.DOTALL) ss = p.search(origText) return ss.end()
class McStasComponentParser(object): def __init__(self, filename=None, config=None, parse=True): self._filename = filename self._config = config # OrderedDict? # Header self._headerstr = "" # Non-parsed header self._header = OrderedDict() # Parsed header self._inputparams = OrderedDict() # Dictionary of input parameters self._outputparams = OrderedDict() # Dictionary of output parameters # Body self._sections = {} # Sections self._defs = {} # Definitions if parse and (self._fileExists() or config): self.parse() def parse(self): """ Parses data from config string or file and populates header structure """ configText = self._configText() bodyText = self._parseHeader(configText) # Parse header self._parseBody(bodyText) def header(self): "Returns header" return self._header def sections(self): "Returns sections" return self._sections def definitions(self): "Returns definitions" return self._defs def inputparams(self): "Returns input parameters" return self._inputparams def outputparams(self): "Returns output parameters" return self._outputparams def toString(self, br="\n"): str = "" for (key, value) in self._header.iteritems(): str += "%s: %s%s" % (key, value, br) str += br for (key, value) in self._sections.iteritems(): str += "%s: %s%s" % (key, value, br) str += br for (key, value) in self._defs.iteritems(): str += "%s: %s%s" % (key, value, br) return str def _parseHeader(self, origText): "Parses header and populates header dictionary" p = re.compile(COMMENT, re.DOTALL) matches = p.findall(origText) if len(matches) < 1: # No header found return origText m = matches[0] # First comment is the header self._headerstr = m text = self._strip(WINCR, m) # Strip carriage return headertext = self._strip(STAR, text) # Strip stars # Extract sections from headertext (hide them?) info = self._sectionText(INFO_SEC, headertext) desc = self._sectionText(DESC_SEC, headertext) param = self._sectionText(PARAM_SEC, headertext) self._parseCompName(headertext) self._parseInfoSection(info) self._parseDescSection(desc) self._parseParamSection(param) # Find end position of the header end = self._headerEnd(origText) return origText[end:] def _parseBody(self, bodyText): "Parses body and populates body dictionary" bodytext = self._cleanupText(bodyText) self._parseDefComp(bodytext) self._parseDefParams(bodytext) self._parseSetParams(bodytext) self._parseOutParams(bodytext) self._parseStateParams(bodytext) self._parsePolParams(bodytext) self._parseBodySections(bodytext) def _cleanupText(self, text): "Cleans up text" temptext = self._strip(WINCR, text) # Strip carriage return temptext = self._strip(COMMENT, temptext) # Strip C comments (/*...*/) # Don't strip C++ comments as it make Jiao's hack die :) #temptext = self._strip(CPP_COMMENT, temptext)# Strip C++ comments (//...) return temptext def _parseDefComp(self, text): "Parses Define Component" name = "" value = self._defValues(DEF_COMP, text) if value: name = value self._defs["name"] = name def _parseDefParams(self, text): "Parses and sets arameters" self._setDefParams(DEF_PARAMS, text, "definition_parameters") def _setDefParams(self, regex, text, paramname): "Parses and parameters and" params = [] value = self._defValues(regex, text) if value: params = self._defParams(value) self._defs[paramname] = params def _defParams(self, line): "Returns definition parameters as dictionary" # Format: [<type>]{spaces}<variable>{spaces}[={spaces}<value>] # Example: line = "string XX, string YY =1, ZZ , WW= 2" params = [] items = line.strip(" ()\n").split(",") for it in items: var = it.strip() # Doesn't work well #match = self._defValues(PARAM_VAR, var, None) match = self._paramMatch(var) assert len(match) == 3 if match[1] == "": # If name is empty, return empty list return [] param = {} param["type"] = match[0] param["name"] = match[1] param["value"] = match[2] params.append(param) return params def _paramMatch(self, var): """ Returns tuple: (<type>, <name>, <value>). Example: ("string", "filename", "'IE.dat'") """ type = "" name = "" value = "" if not var: return (type, name, value) parts = var.split("=") if len(parts) == 2: value = parts[1].strip() # Get value if it exists value = value.strip("\"'") # Strip quotation marks? # Catching pointer variable parts2 = parts[0].split("*") if len(parts2) == 2: type = "%s *" % parts2[0].strip() name = parts2[1].strip() return (type, name, value) # Catching non-pointer variable varparts = parts[0].split() if len(varparts) == 2: type = varparts[0].strip() name = varparts[1].strip() elif len(varparts) == 1: name = varparts[0].strip() return (type, name, value) def _parseSetParams(self, text): "Parses Setting Parameters" self._setDefParams(SET_PARAMS, text, "setting_parameters") def _parseOutParams(self, text): "Parses Output Parameters" self._setDefParams(OUT_PARAMS, text, "output_parameters") def _parseStateParams(self, text): "Parses State Parameters" self._setListParams(STATE_PARAMS, text, "state_parameters") def _parsePolParams(self, text): "Parses Polarization Parameters" self._setListParams(POL_PARAMS, text, "polarization_parameters") def _setListParams(self, regex, text, paramname): "Parses text and populates list parameters in defintion part" params = [] value = self._defValues(regex, text) if value: items = value.strip(" ()").split( ",") # Strip brackets just in case for it in items: # clean up params params.append(it.strip()) self._defs[paramname] = params def _defValues(self, regex, text, flags=re.DOTALL | re.IGNORECASE | re.MULTILINE): "Returns matches for regex pattern. Used mostly for definitions" p = re.compile(regex) if flags: p = re.compile(regex, flags) matches = p.findall(text) if len(matches) < 1: # No value found return None m = matches[0] if type(m) is str: return m.strip() # If value is string, strip spaces return m # otherwise return as they are def _parseBodySections(self, text): "Parse body sections" for secname in BODY_SECTIONS: p = re.compile(sectionRegex(secname), re.DOTALL | re.IGNORECASE) matches = p.findall(text) secname = secname.lower() # Turn section name lower case if secname == FINALLY.lower( ): # Special case for "FINALLY" section secname = "finalize" if len(matches) < 1: # No section found self._sections[secname] = "" continue mm = matches[0] if len(mm) != 2: # Section content is empty self._sections[secname] = "" continue self._sections[secname] = mm[1] def _configText(self): "Take config from file if it exist and readable, or use from config - otherwise" configText = "" if self._fileExists(): try: # Try to read it configText = open(self._filename).read() except: pass # No exception return configText if self._config: configText = self._config return configText # Empty string def _fileExists(self): "Checks if file exists" if self._filename and os.path.exists(self._filename): return True return False def _strip(self, regex, text): "Strips piece of text that matches regex pattern" p = re.compile(regex, re.DOTALL | re.MULTILINE) s = re.sub(p, '', text) return s def _parseCompName(self, text): p = re.compile(COMP_NAME, re.IGNORECASE) namefinds = p.findall(text) if not namefinds: return "" # Empty name compname = namefinds[0].strip() self._header["componentname"] = compname # XXX: integrate with _defValue() def _sectionText(self, secregex, text, flags=re.DOTALL): "Returns section string that matches secregex pattern" p = re.compile(secregex) if flags: p = re.compile(secregex, flags) matches = p.findall(text) if len(matches) < 1: # No section found, return empty string return "" return matches[0] # Return the first found match # XXX: Merge with _populateParams() def _parseInfoSection(self, text): "Parses info section and populates part of header parameters" # XXX: There might be problems that description has ':' character # In this case check if numbr == 2 and afterparam = True lines = text.split("\n") for l in lines: l = l.strip() if l == '': continue # Skip empty line p = re.compile(PARAM) m = p.match(l) if m: param = m.group(1).strip() value = m.group(2).strip() paramname = self._paramName(param) if not paramname: continue paramname = paramname.lower() self._header[paramname] = value else: self._header["simple_description"] = l def _paramName(self, param): """ Returns parameter name. Note: Only those parameter which are in INFO_PARAMS will be returned """ # Non standard parameter if self._isMatch(COPYRIGHT_N, param): return "copyright" # Standard parameters for regex in STD_PARAMS: if self._isMatch(regex, param): return param return None def _isMatch(self, regex, text): "Returns True if matches, False - otherwise" p = re.compile(regex, re.IGNORECASE) m = p.match(text) if m: return True # There is the match return False def _parseDescSection(self, text): "Parses description section and populates part of header parameters" # Find example p = re.compile(EXAMPLE, re.DOTALL | re.IGNORECASE) matches = p.findall(text) example = "" # Default value if len(matches) >= 1: # No section found, return empty string mstr = matches[0] # Take first match! if mstr: example = " ".join(mstr.strip(" \n").split("\n")) self._header["example"] = example # Get full description: strip example and take whatever is left text = self._strip(EXAMPLE, text) self._header["full_description"] = text.strip() def _parseParamSection(self, text): "Parses parameter section and populates input and output parameters of header" # Get output parameters first! outputtext = self._sectionText(OUTPUT_PARAMS, text, flags=re.DOTALL | re.IGNORECASE) filteredtext = self._strip(OUTPUT_PARAMS, text) # ... and then input parameters inputtext = self._sectionText(INPUT_PARAMS, filteredtext, flags=re.DOTALL | re.IGNORECASE) self._parseInputSubsection(inputtext) self._parseOutputSubsection(outputtext) def _parseInputSubsection(self, text): "Parses input text and populates input parameters" self._inputparams = self._populateParams(IOPARAM, text) self._header["input_parameters"] = self._inputparams def _parseOutputSubsection(self, text): "Parses output text and populates output parameters" self._outputparams = self._populateParams(IOPARAM, text) self._header["output_parameters"] = self._outputparams def _populateParams(self, paramregex, text): "Populates dictionary of parameters" params = {} lines = text.split("\n") for l in lines: l = l.strip() if l == '': continue # Skip empty line p = re.compile(paramregex) m = p.match(l) if m: (param, value) = (m.group(1).strip(), m.group(2).strip()) # XXX: What if value has '\n'? if not param: continue params[param] = value return params def _headerEnd(self, origText): "Returns end position of the header" p = re.compile(COMMENT, re.DOTALL) ss = p.search(origText) return ss.end()