示例#1
0
 def parse(self, inputFilename, outputFilename):
     with open(inputFilename, 'r') as ICRFile:
         for line in ICRFile:
             line = line.rstrip("\r\n")
             self._curLineNo += 1
             """ get rid of lines that are ignored """
             if self.isIgnoredLine(line):
                 continue
             match = START_OF_RECORD.match(line)
             if match:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
             if match and match.group('name') in ICR_FILE_KEYWORDS:
                 fieldName = match.group('name')
                 if isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     logger.debug('field name is: %s', fieldName)
                     logger.debug('cur field is: %s', self._curField)
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(
                                 fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack()
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(
                         self._curField):
                     logger.warn(
                         'Ignore blank line for current field: [%s]',
                         self._curField)
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     print 'No field associated with line %s: %s ' % (
                         self._curLineNo, line)
     logger.info('End of file now')
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         logger.info('Add last record: %s', self._curRecord)
         self._outObject.append(self._curRecord)
     # pprint.pprint(self._outObject);
     with open(outputFilename, 'w') as out_file:
         json.dump(self._outObject, out_file, indent=4)
示例#2
0
文件: ICRParser.py 项目: OSEHRA/VistA
 def parse(self, inputFilename, outputFilename):
     with open(inputFilename, "r") as ICRFile:
         for line in ICRFile:
             line = line.rstrip("\r\n")
             self._curLineNo += 1
             """ get rid of lines that are ignored """
             if self.isIgnoredLine(line):
                 continue
             match = START_OF_RECORD.match(line)
             if match:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
             if match and match.group("name") in ICR_FILE_KEYWORDS:
                 fieldName = match.group("name")
                 if isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     logger.debug("field name is: %s", fieldName)
                     logger.debug("cur field is: %s", self._curField)
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack()
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(self._curField):
                     logger.warn("Ignore blank line for current field: [%s]", self._curField)
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     print "No field associated with line %s: %s " % (self._curLineNo, line)
     logger.info("End of file now")
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         logger.info("Add last record: %s", self._curRecord)
         self._outObject.append(self._curRecord)
     # pprint.pprint(self._outObject);
     with open(outputFilename, "w") as out_file:
         json.dump(self._outObject, out_file, indent=4)
示例#3
0
    def _findKeyValueInLine(self, match, line):
        """ parse all name value pair in a line and put back in self._curRecord"""
        name = match.group('name')
        # this is the name part
        """ add logic to ignore some of the field """

        # now find if there is any other name value pair in the same line
        restOfLine = line[match.end():]
        allFlds = []
        if name in ICR_FILE_KEYWORDS:
            allFlds = [name]
        allmatches = []
        for m in GENERIC_FIELD_RECORD.finditer(restOfLine):
            if m.group('name') in ICR_FILE_KEYWORDS:  # ignore non-keyword
                allmatches.append(m)
                allFlds.append(m.group('name'))
        if allmatches:
            changeField = False
            for idx, rm in enumerate(allmatches):
                if idx == 0 and name in ICR_FILE_KEYWORDS:
                    val = restOfLine[:rm.start()].strip()
                    self._curRecord[name] = val
                    changeField = not (name == 'DESCRIPTION' and val == "")
                if idx == len(allmatches) - 1:
                    if isWordProcessingField(self._curField):
                        if self._ignoreKeywordInWordProcessingFields(
                                rm.group('name')):
                            self._appendWordsFieldLine(restOfLine)
                    elif changeField:
                        self._curField = rm.group('name')
                        self._curRecord[
                            self._curField] = restOfLine[rm.end():].strip()
                    else:
                        self._curRecord[rm.group(
                            'name')] = restOfLine[rm.end():].strip()
                else:
                    if changeField:
                        self._curField = allmatches[idx - 1].group('name')
                        self._curRecord[self._curField] = restOfLine[
                            allmatches[idx - 1].end():rm.start()].strip()
                    else:
                        self._curRecord[allmatches[idx - 1].group(
                            'name')] = restOfLine[allmatches[idx - 1].end():rm.
                                                  start()].strip()
        else:
            if name == 'GENERAL DESCRIPTION':
                self._curRecord[name] = [line[match.end():].strip()]
            else:
                self._curRecord[name] = line[match.end():].strip()

        dtFields = set(allFlds) & DATE_TIME_FIELD
        for fld in dtFields:
            self._curRecord[fld] = self._convertDateTimeField(
                self._curRecord[fld])
示例#4
0
def _convertIndividualFieldValue(field, icrEntry, value, crossRef):
    if isWordProcessingField(field):
        if type(value) is list:
            value = "\n".join(value)
        value = '<pre>\n' + cgi.escape(value) + '\n</pre>\n'
        return value
    if field in FIELD_CONVERT_MAP:
        if type(value) is list:
            return value
        value = FIELD_CONVERT_MAP[field](value, icrEntry, crossRef=crossRef)
        return value
    return value
示例#5
0
def _convertIndividualFieldValue(field, icrEntry, value, crossRef):
    if isWordProcessingField(field):
        if type(value) is list:
            value = "\n".join(value)
        value = '<pre>\n' + cgi.escape(value) + '\n</pre>\n'
        return value
    if field in FIELD_CONVERT_MAP:
        if type(value) is list:
            return value
        value = FIELD_CONVERT_MAP[field](value, icrEntry, crossRef=crossRef)
        return value
    return value
示例#6
0
 def _convertIndividualFieldValue(self, field, icrEntry, value):
     if isWordProcessingField(field):
         if type(value) is list:
             value = "\n".join(value)
         value = '<pre>\n' + cgi.escape(value) + '\n</pre>\n'
         return value
     if field in field_convert_map:
         if type(value) is list:
             logger.warn('field: [%s], value:[%s], icrEntry: [%s]', field, value, icrEntry)
             return value
         value = field_convert_map[field](value, icrEntry, crossRef=self._crossRef)
         return value
     return value
示例#7
0
 def _convertIndividualFieldValue(self, field, icrEntry, value):
     if isWordProcessingField(field):
         if type(value) is list:
             value = "\n".join(value)
         value = '<pre>\n' + cgi.escape(value) + '\n</pre>\n'
         return value
     if field in field_convert_map:
         if type(value) is list:
             logger.warn('field: [%s], value:[%s], icrEntry: [%s]', field, value, icrEntry)
             return value
         value = field_convert_map[field](value, icrEntry, crossRef=self._crossRef)
         return value
     return value
示例#8
0
    def _findKeyValueInLine(self, match, line):
        """ parse all name value pair in a line and put back in self._curRecord"""
        name = match.group('name'); # this is the name part
        """ add logic to ignore some of the field """

        # now find if there is any other name value pair in the same line
        restOfLine = line[match.end():]
        allFlds = []
        if name in ICR_FILE_KEYWORDS:
            allFlds = [name]
        allmatches = []
        for m in GENERIC_FIELD_RECORD.finditer(restOfLine):
            if m.group('name') in ICR_FILE_KEYWORDS: # ignore non-keyword
                allmatches.append(m)
                allFlds.append(m.group('name'))
        if allmatches:
            changeField = False
            for idx, rm in enumerate(allmatches):
                if idx == 0 and name in ICR_FILE_KEYWORDS:
                    val = restOfLine[:rm.start()].strip()
                    self._curRecord[name] = val
                    changeField = not(name == 'DESCRIPTION' and val == "")
                if idx == len(allmatches) -1:
                    if isWordProcessingField(self._curField):
                        if self._ignoreKeywordInWordProcessingFields(rm.group('name')):
                            self._appendWordsFieldLine(restOfLine)
                    elif changeField:
                        self._curField = rm.group('name')
                        self._curRecord[self._curField] = restOfLine[rm.end():].strip()
                    else:
                        self._curRecord[rm.group('name')] = restOfLine[rm.end():].strip()
                else:
                    if changeField:
                        self._curField = allmatches[idx-1].group('name')
                        self._curRecord[self._curField] = restOfLine[allmatches[idx-1].end():rm.start()].strip()
                    else:
                        self._curRecord[allmatches[idx-1].group('name')] = restOfLine[allmatches[idx-1].end():rm.start()].strip()
        else:
            if name == 'GENERAL DESCRIPTION':
                self._curRecord[name] = [line[match.end():].strip()]
            else:
                self._curRecord[name] = line[match.end():].strip()

        dtFields = set(allFlds) & DATE_TIME_FIELD
        for fld in dtFields:
            self._curRecord[fld] = self._convertDateTimeField(self._curRecord[fld])
示例#9
0
 def _convertIndividualFieldValuePDF(self,
                                     field,
                                     value,
                                     writeField=False,
                                     keepTogether=True):
     if isWordProcessingField(field):
         if type(value) is list:
             cell = []
             for item in value:
                 text = cgi.escape(item)
                 if writeField:
                     text = "%s : %s" % (field, text)
                 # TODO: "Field:" should not be styled as 'Code'
                 cell.append(Paragraph(text, styles['Normal']))
             if keepTogether:
                 return KeepTogether(cell)
             else:
                 return cell
         else:
             text = cgi.escape(value)
             if writeField:
                 text = "%s : %s" % (field, text)
             # TODO: "Field:" should not be styled as 'Code'
             return Paragraph(text, styles['Normal'])
     if type(value) is list:
         cell = []
         for item in value:
             text = item
             if writeField:
                 text = "%s : %s" % (field, text)
             cell.append(Paragraph(text, styles['Normal']))
         if keepTogether:
             return KeepTogether(cell)
         else:
             return cell
     else:
         text = value
         if writeField:
             text = "%s : %s" % (field, text)
         return Paragraph(text, styles['Normal'])
示例#10
0
def _convertIndividualFieldValuePDF(field, value, writeField=False,
                                    keepTogether=True):
    if isWordProcessingField(field):
        if type(value) is list:
            cell = []
            for item in value:
                text = cgi.escape(item)
                if writeField:
                  text = "%s : %s" % (field, text)
                # TODO: "Field:" should not be styled as 'Code'
                cell.append(Paragraph(text, STYLES['Normal']))
            if keepTogether:
                return KeepTogether(cell)
            else:
                return cell
        else:
            text = cgi.escape(value)
            if writeField:
              text = "%s : %s" % (field, text)
            # TODO: "Field:" should not be styled as 'Code'
            return Paragraph(text, STYLES['Normal'])
    if type(value) is list:
        cell = []
        for item in value:
            text = item
            if writeField:
              text = "%s : %s" % (field, text)
            cell.append(Paragraph(text, STYLES['Normal']))
        if keepTogether:
            return KeepTogether(cell)
        else:
            return cell
    else:
        text = value
        if writeField:
            text = "%s : %s" % (field, text)
        return Paragraph(text, STYLES['Normal'])
示例#11
0
    def parse(self, inputFilename, outputFilename):
        with open(inputFilename, 'r') as ICRFile:
            curLineNo = 0
            curNumber = None
            # Free text fields may contain field names and
            # need special parsing rules
            DBAComments = False
            generalDescription = False
            subscribingDetails = False
            componentDescription = False

            for line in ICRFile:
                line = line.rstrip("\r\n")
                curLineNo += 1
                # get rid of lines that are ignored
                if self.isIgnoredLine(line):
                    continue
                match = INTEGRATION_REFERENCES_LIST.match(line)
                if match:
                    # Skip this line. Use getDate() to parse date
                    continue
                match = START_OF_RECORD.match(line)
                if match:
                    name = match.group('name')
                    number = match.group('number')
                    skipField = False
                    isFreeTextField = DBAComments or generalDescription or \
                        subscribingDetails or componentDescription
                    if isFreeTextField:
                        # Check if the number is matches what
                        # we're currently processing
                        skipField = number == curNumber
                    if not skipField:
                        curNumber = number

                        DBAComments = False
                        generalDescription = False
                        subscribingDetails = False
                        componentDescription = False

                        self._startOfNewItem(name, number, match, line)
                        continue

                match = GENERIC_START_OF_RECORD.search(line)
                if not match:
                    # DBA Comments doesn't match regex for other fields,
                    # check separately. Even if we get a match here, can't
                    # assume that we're in a DBA Comments field, might be in
                    # a different free text field
                    match = DBA_COMMENTS.match(line)

                if match and match.group('name') in ICR_FILE_KEYWORDS:
                    fieldName = match.group('name')

                    # First check if we are at the end of a free text field
                    if DBAComments:
                        if fieldName in [
                                'DATE/TIME EDITED', 'NUMBER', 'DATE ACTIVATED'
                        ]:
                            DBAComments = False
                    elif generalDescription:
                        # Starts with exactly 2 spaces
                        if line.startswith(
                                "  STATUS:") or fieldName == 'VIEWER':
                            generalDescription = False
                    elif subscribingDetails:
                        # This assumes that 'Subscribing Details' may start
                        # with a field name or may contain 'GLOBAL REFERENCE'
                        # but won't contain any other field names in the middle
                        if fieldName in ICR_FILE_KEYWORDS and \
                          fieldName != 'GLOBAL REFERENCE' and \
                          'SUBSCRIBING DETAILS' in self._curRecord:
                            subscribingDetails = False
                    elif componentDescription:
                        # At most one space before 'VARIABLES:'
                        if line.startswith("VARIABLES:") or \
                          line.startswith(" VARIABLES:") or \
                          fieldName in ['COMPONENT/ENTRY POINT', 'SUBSCRIBING PACKAGE']:
                            componentDescription = False

                    # Are we at the beginning of a free text field?
                    if DBAComments or generalDescription or \
                      subscribingDetails or componentDescription:
                        # Free text fields are never nested
                        pass
                    elif fieldName == 'DBA Comments':
                        DBAComments = True
                    elif fieldName == 'GENERAL DESCRIPTION':
                        generalDescription = True
                    elif fieldName == 'SUBSCRIBING DETAILS':
                        subscribingDetails = True
                    elif fieldName == 'COMPONENT DESCRIPTION':
                        componentDescription = True

                    # Process line
                    # Start with free text fields
                    if DBAComments:
                        fieldName = 'DBA Comments'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            self._curField = fieldName
                            self._rewindStack()
                            name = match.group('name')  # this is the name part
                            restOfLine = line[match.end():]
                            self._curRecord[name] = restOfLine.strip()
                    elif generalDescription:
                        fieldName = 'GENERAL DESCRIPTION'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            # Starting to process general description
                            self._curField = fieldName
                            self._rewindStack()
                            self._findKeyValueInLine(match, line)
                    elif subscribingDetails:
                        fieldName = 'SUBSCRIBING DETAILS'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            self._curField = fieldName
                            name = match.group('name')  # this is the name part
                            restOfLine = line[match.end():]
                            self._curRecord[name] = restOfLine.strip()
                    elif componentDescription:
                        fieldName = 'COMPONENT DESCRIPTION'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            # Starting to process component description
                            self._curField = fieldName
                            self._rewindStack()
                            self._findKeyValueInLine(match, line)

                    elif isSubFile(fieldName):
                        self._curField = fieldName
                        self._startOfSubFile(match, line)
                    else:
                        # Check to see if fieldName is already in the out list
                        if isWordProcessingField(self._curField):
                            if self._ignoreKeywordInWordProcessingFields(
                                    fieldName):
                                self._appendWordsFieldLine(line)
                                continue
                        # figure out where to store the record
                        self._curField = fieldName
                        self._rewindStack()
                        self._findKeyValueInLine(match, line)
                elif self._curField and self._curField in self._curRecord:
                    if not line.strip() and not isWordProcessingField(
                            self._curField):
                        # Ignore blank line
                        continue
                    self._appendWordsFieldLine(line)
                else:
                    if self._curRecord:
                        if not line.strip():
                            continue
                        logger.error('No field associated with line %s: %s ' %
                                     (curLineNo, line))
        # TODO: Copy + paste from '_startOfNewItem()'
        self._curField = None
        self._rewindStack()
        if self._curRecord:
            self._outObject.append(self._curRecord)
        outputDir = os.path.dirname(outputFilename)
        if not os.path.exists(outputDir):
            # Will also create intermediate directories if needed
            os.makedirs(outputDir)
        with open(outputFilename, 'w') as out_file:
            json.dump(self._outObject, out_file, indent=4)
示例#12
0
 def parse(self, inputFilename, outputFilename):
     global date
     with open(inputFilename,'r') as ICRFile:
         for line in ICRFile:
             line = line.rstrip("\r\n")
             self._curLineNo +=1
             """ get rid of lines that are ignored """
             if self.isIgnoredLine(line):
                 continue
             match = INTEGRATION_REFERENCES_LIST.match(line)
             if match:
                 date = match.group(1).strip()
                 continue
             match = START_OF_RECORD.match(line)
             if match and not self._DBAComments and not self._generalDescription:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
                 if match:
                     self._DBAComments = True
             if match and match.group('name') in ICR_FILE_KEYWORDS:
                 fieldName = match.group('name')
                 if fieldName == 'DBA Comments':
                     self._DBAComments = True
                 elif fieldName == 'GENERAL DESCRIPTION':
                     self._generalDescription = True
                 if self._DBAComments:
                     if fieldName in ICR_FILE_KEYWORDS:
                         self._DBAComments = False
                 elif self._generalDescription:
                     if line.startswith("  STATUS:"):  # Starts with exactly 2 spaces
                         self._generalDescription = False
                 if self._DBAComments:
                     fieldName = 'DBA Comments'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         self._curField = fieldName
                         name = match.group('name') # this is the name part
                         restOfLine = line[match.end():]
                         self._curRecord[name] = restOfLine.strip()
                 elif self._generalDescription:
                     fieldName = 'GENERAL DESCRIPTION'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         self._curField = fieldName
                         name = match.group('name') # this is the name part
                         restOfLine = line[match.end():]
                         self._curRecord[name] = restOfLine.strip()
                 elif isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     logger.debug('field name is: %s', fieldName)
                     logger.debug('cur field is: %s', self._curField)
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack();
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(self._curField):
                     logger.warn('Ignore blank line for current field: [%s]', self._curField)
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     print 'No field associated with line %s: %s ' % (self._curLineNo, line)
     logger.info('End of file now')
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         logger.info('Add last record: %s', self._curRecord)
         self._outObject.append(self._curRecord)
     # pprint.pprint(self._outObject);
     with open(outputFilename, 'w') as out_file:
         json.dump(self._outObject,out_file, indent=4)
示例#13
0
 def parse(self, inputFilename, outputFilename):
     global date
     with open(inputFilename,'r') as ICRFile:
         for line in ICRFile:
             line = line.rstrip("\r\n")
             self._curLineNo +=1
             # get rid of lines that are ignored
             if self.isIgnoredLine(line):
                 continue
             match = INTEGRATION_REFERENCES_LIST.match(line)
             if match:
                 date = match.group(1).strip()
                 continue
             match = START_OF_RECORD.match(line)
             if match and not self._DBAComments and not self._generalDescription:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
                 if match:
                     self._DBAComments = True
             if match and match.group('name') in ICR_FILE_KEYWORDS:
                 fieldName = match.group('name')
                 if fieldName == 'DBA Comments':
                     self._DBAComments = True
                 elif fieldName == 'GENERAL DESCRIPTION':
                     self._generalDescription = True
                 if self._DBAComments:
                     if fieldName in ICR_FILE_KEYWORDS:
                         self._DBAComments = False
                 elif self._generalDescription:
                     if line.startswith("  STATUS:"):  # Starts with exactly 2 spaces
                         self._generalDescription = False
                 if self._DBAComments:
                     fieldName = 'DBA Comments'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         self._curField = fieldName
                         name = match.group('name') # this is the name part
                         restOfLine = line[match.end():]
                         self._curRecord[name] = restOfLine.strip()
                 elif self._generalDescription:
                     fieldName = 'GENERAL DESCRIPTION'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         # Starting to process general description
                         self._curField = fieldName
                         self._rewindStack();
                         self._findKeyValueInLine(match, line, self._curRecord)
                 elif isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     logger.debug('field name is: %s', fieldName)
                     logger.debug('cur field is: %s', self._curField)
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack();
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(self._curField):
                     logger.warn('Ignore blank line for current field: [%s]', self._curField)
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     print 'No field associated with line %s: %s ' % (self._curLineNo, line)
     logger.info('End of file now')
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         logger.info('Add last record: %s', self._curRecord)
         self._outObject.append(self._curRecord)
     # pprint.pprint(self._outObject);
     with open(outputFilename, 'w') as out_file:
         json.dump(self._outObject,out_file, indent=4)
示例#14
0
 def parse(self, inputFilename, outputFilename):
     with open(inputFilename, 'r') as ICRFile:
         curLineNo = 0
         DBAComments = False
         generalDescription = False
         for line in ICRFile:
             line = line.rstrip("\r\n")
             curLineNo += 1
             # get rid of lines that are ignored
             if self.isIgnoredLine(line):
                 continue
             match = INTEGRATION_REFERENCES_LIST.match(line)
             if match:
                 # Skip this line. Use getDate() to parse date
                 continue
             match = START_OF_RECORD.match(line)
             if match and not DBAComments and not generalDescription:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
                 if match:
                     DBAComments = True
             if match and match.group('name') in ICR_FILE_KEYWORDS:
                 fieldName = match.group('name')
                 if fieldName == 'DBA Comments':
                     DBAComments = True
                 elif fieldName == 'GENERAL DESCRIPTION':
                     generalDescription = True
                 if DBAComments:
                     if fieldName in ICR_FILE_KEYWORDS:
                         DBAComments = False
                 elif generalDescription:
                     if line.startswith(
                             "  STATUS:"):  # Starts with exactly 2 spaces
                         generalDescription = False
                 if DBAComments:
                     fieldName = 'DBA Comments'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         self._curField = fieldName
                         name = match.group('name')  # this is the name part
                         restOfLine = line[match.end():]
                         self._curRecord[name] = restOfLine.strip()
                 elif generalDescription:
                     fieldName = 'GENERAL DESCRIPTION'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         # Starting to process general description
                         self._curField = fieldName
                         self._rewindStack()
                         self._findKeyValueInLine(match, line,
                                                  self._curRecord)
                 elif isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(
                                 fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack()
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(
                         self._curField):
                     # Ignore blank line
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     logger.debug('No field associated with line %s: %s ' %
                                  (curLineNo, line))
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         self._outObject.append(self._curRecord)
     outputDir = os.path.dirname(outputFilename)
     if not os.path.exists(outputDir):
         # Will also create intermediate directories if needed
         os.makedirs(outputDir)
     with open(outputFilename, 'w') as out_file:
         json.dump(self._outObject, out_file, indent=4)
示例#15
0
    def parse(self, inputFilename, outputFilename):
        with open(inputFilename,'r') as ICRFile:
            curLineNo = 0
            curNumber = None
            # Free text fields may contain field names and
            # need special parsing rules
            DBAComments = False
            generalDescription = False
            subscribingDetails = False
            componentDescription = False

            for line in ICRFile:
                line = line.rstrip("\r\n")
                curLineNo +=1
                # get rid of lines that are ignored
                if self.isIgnoredLine(line):
                    continue
                match = INTEGRATION_REFERENCES_LIST.match(line)
                if match:
                    # Skip this line. Use getDate() to parse date
                    continue
                match = START_OF_RECORD.match(line)
                if match:
                    name = match.group('name')
                    number = match.group('number')
                    skipField = False
                    isFreeTextField = DBAComments or generalDescription or \
                        subscribingDetails or componentDescription
                    if isFreeTextField:
                        # Check if the number is matches what
                        # we're currently processing
                        skipField = number == curNumber
                    if not skipField:
                        curNumber = number

                        DBAComments = False
                        generalDescription = False
                        subscribingDetails = False
                        componentDescription = False

                        self._startOfNewItem(name, number, match, line)
                        continue

                match = GENERIC_START_OF_RECORD.search(line)
                if not match:
                    # DBA Comments doesn't match regex for other fields,
                    # check separately. Even if we get a match here, can't
                    # assume that we're in a DBA Comments field, might be in
                    # a different free text field
                    match = DBA_COMMENTS.match(line)

                if match and match.group('name') in ICR_FILE_KEYWORDS:
                    fieldName = match.group('name')

                    # First check if we are at the end of a free text field
                    if DBAComments:
                        if fieldName in ['DATE/TIME EDITED', 'NUMBER', 'DATE ACTIVATED']:
                            DBAComments = False
                    elif generalDescription:
                        # Starts with exactly 2 spaces
                        if line.startswith("  STATUS:") or fieldName == 'VIEWER':
                            generalDescription = False
                    elif subscribingDetails:
                        # This assumes that 'Subscribing Details' may start
                        # with a field name or may contain 'GLOBAL REFERENCE'
                        # but won't contain any other field names in the middle
                        if fieldName in ICR_FILE_KEYWORDS and \
                          fieldName != 'GLOBAL REFERENCE' and \
                          'SUBSCRIBING DETAILS' in self._curRecord:
                            subscribingDetails = False
                    elif componentDescription:
                        # At most one space before 'VARIABLES:'
                        if line.startswith("VARIABLES:") or \
                          line.startswith(" VARIABLES:") or \
                          fieldName in ['COMPONENT/ENTRY POINT', 'SUBSCRIBING PACKAGE']:
                            componentDescription = False

                    # Are we at the beginning of a free text field?
                    if DBAComments or generalDescription or \
                      subscribingDetails or componentDescription:
                        # Free text fields are never nested
                        pass
                    elif fieldName == 'DBA Comments':
                        DBAComments = True
                    elif fieldName == 'GENERAL DESCRIPTION':
                        generalDescription = True
                    elif fieldName == 'SUBSCRIBING DETAILS':
                        subscribingDetails = True
                    elif fieldName == 'COMPONENT DESCRIPTION':
                        componentDescription = True

                    # Process line
                    # Start with free text fields
                    if DBAComments:
                        fieldName = 'DBA Comments'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            self._curField = fieldName
                            self._rewindStack()
                            name = match.group('name') # this is the name part
                            restOfLine = line[match.end():]
                            self._curRecord[name] = restOfLine.strip()
                    elif generalDescription:
                        fieldName = 'GENERAL DESCRIPTION'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            # Starting to process general description
                            self._curField = fieldName
                            self._rewindStack();
                            self._findKeyValueInLine(match, line)
                    elif subscribingDetails:
                        fieldName = 'SUBSCRIBING DETAILS'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            self._curField = fieldName
                            name = match.group('name') # this is the name part
                            restOfLine = line[match.end():]
                            self._curRecord[name] = restOfLine.strip()
                    elif componentDescription:
                        fieldName = 'COMPONENT DESCRIPTION'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            # Starting to process component description
                            self._curField = fieldName
                            self._rewindStack()
                            self._findKeyValueInLine(match, line)

                    elif isSubFile(fieldName):
                        self._curField = fieldName
                        self._startOfSubFile(match, line)
                    else:
                        # Check to see if fieldName is already in the out list
                        if isWordProcessingField(self._curField):
                            if self._ignoreKeywordInWordProcessingFields(fieldName):
                                self._appendWordsFieldLine(line)
                                continue
                        # figure out where to store the record
                        self._curField = fieldName
                        self._rewindStack()
                        self._findKeyValueInLine(match, line)
                elif self._curField and self._curField in self._curRecord:
                    if not line.strip() and not isWordProcessingField(self._curField):
                        # Ignore blank line
                        continue
                    self._appendWordsFieldLine(line)
                else:
                    if self._curRecord:
                        if not line.strip():
                            continue
                        logger.error('No field associated with line %s: %s ' %
                                      (curLineNo, line))
        # TODO: Copy + paste from '_startOfNewItem()'
        self._curField = None
        self._rewindStack()
        if self._curRecord:
            self._outObject.append(self._curRecord)
        outputDir = os.path.dirname(outputFilename)
        if not os.path.exists(outputDir):
            # Will also create intermediate directories if needed
            os.makedirs(outputDir)
        with open(outputFilename, 'w') as out_file:
            json.dump(self._outObject,out_file, indent=4)