示例#1
0
 def _icrSubFileToHtml(self, output, icrJson, subFile):
     logger.debug('subFile is %s', subFile)
     # TODO: Is 'icrJson' the correct name for this variable?
     logger.debug('icrJson is %s', icrJson)
     fieldList = SUBFILE_FIELDS[subFile]
     if subFile not in fieldList:
         fieldList.append(subFile)
     for icrEntry in icrJson:
         output.write ("<li>\n")
         for field in fieldList:
             if field in icrEntry: # we have this field
                 value = icrEntry[field]
                 logger.debug('current field is %s', field)
                 if isSubFile(field) and field != subFile: # avoid recursive subfile for now
                     logger.debug('field is a subfile %s', field)
                     output.write ("<dl><dt>%s:</dt>\n" % field)
                     output.write ("<dd>\n")
                     output.write ("<ol>\n")
                     self._icrSubFileToHtml(output, value, field)
                     output.write ("</ol>\n")
                     output.write ("</dd></dl>\n")
                     continue
                 value = self._convertIndividualFieldValue(field, icrEntry, value)
                 output.write ("<dt>%s:  &nbsp;&nbsp;%s</dt>\n" % (field, value))
         output.write ("</li>\n")
示例#2
0
 def parse(self, inputFilename, outputFilename):
     with open(inputFilename, 'r') as ICRFile:
         for line in ICRFile:
             line = line.rstrip("\r\n")
             self._curLineNo += 1
             """ get rid of lines that are ignored """
             if self.isIgnoredLine(line):
                 continue
             match = START_OF_RECORD.match(line)
             if match:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
             if match and match.group('name') in ICR_FILE_KEYWORDS:
                 fieldName = match.group('name')
                 if isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     logger.debug('field name is: %s', fieldName)
                     logger.debug('cur field is: %s', self._curField)
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(
                                 fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack()
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(
                         self._curField):
                     logger.warn(
                         'Ignore blank line for current field: [%s]',
                         self._curField)
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     print 'No field associated with line %s: %s ' % (
                         self._curLineNo, line)
     logger.info('End of file now')
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         logger.info('Add last record: %s', self._curRecord)
         self._outObject.append(self._curRecord)
     # pprint.pprint(self._outObject);
     with open(outputFilename, 'w') as out_file:
         json.dump(self._outObject, out_file, indent=4)
示例#3
0
 def _icrSubFileToPDF(self, pdf, icrJson, subFile):
     fieldList = SUBFILE_FIELDS[subFile]
     if subFile not in fieldList:
         fieldList.append(subFile)
     for icrEntry in icrJson:
         for field in fieldList:
             if field in icrEntry: # we have this field
                 value = icrEntry[field]
                 if isSubFile(field) and field != subFile: # avoid recursive subfile for now
                     self._icrSubFileToPDF(pdf, value, field)
                     continue
                 pdf.append(self._convertIndividualFieldValuePDF(field, value, True))
示例#4
0
def _icrSubFileToPDF(pdf, icrJson, subFile):
    fieldList = SUBFILE_FIELDS[subFile]
    if subFile not in fieldList:
        fieldList.append(subFile)
    for icrEntry in icrJson:
        for field in fieldList:
            if field in icrEntry: # we have this field
                value = icrEntry[field]
                if isSubFile(field) and field != subFile: # avoid recursive subfile for now
                    _icrSubFileToPDF(pdf, value, field)
                    continue
                pdf.append(_convertIndividualFieldValuePDF(field, value, True))
示例#5
0
文件: ICRParser.py 项目: OSEHRA/VistA
 def parse(self, inputFilename, outputFilename):
     with open(inputFilename, "r") as ICRFile:
         for line in ICRFile:
             line = line.rstrip("\r\n")
             self._curLineNo += 1
             """ get rid of lines that are ignored """
             if self.isIgnoredLine(line):
                 continue
             match = START_OF_RECORD.match(line)
             if match:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
             if match and match.group("name") in ICR_FILE_KEYWORDS:
                 fieldName = match.group("name")
                 if isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     logger.debug("field name is: %s", fieldName)
                     logger.debug("cur field is: %s", self._curField)
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack()
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(self._curField):
                     logger.warn("Ignore blank line for current field: [%s]", self._curField)
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     print "No field associated with line %s: %s " % (self._curLineNo, line)
     logger.info("End of file now")
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         logger.info("Add last record: %s", self._curRecord)
         self._outObject.append(self._curRecord)
     # pprint.pprint(self._outObject);
     with open(outputFilename, "w") as out_file:
         json.dump(self._outObject, out_file, indent=4)
示例#6
0
    def _icrDataEntryToPDF(self, pdf, icrJson):
        # Write the ICR data as a document (list) instead of
        # a table. Otherwise, the rows can become taller than
        # a page and reportlab will fail to create the pdf.

        fieldList = ICR_FILE_KEYWORDS_LIST
        # As we do not have a real schema to define the field order,
        # we will have to guess the order here
        description = ""
        globalReferenceSections = []
        for field in fieldList:
            if field in icrJson:  # we have this field
                value = icrJson[field]
                if "GLOBAL REFERENCE" == field:
                    self._writeGlobalReferenceToPDF(value, pdf)
                    continue
                ###############################################################
                if "COMPONENT/ENTRY POINT" == field:
                    self._writeComponentEntryPointToPDF(value, pdf)
                    continue
                ###############################################################
                if "GENERAL DESCRIPTION" == field:
                    description = []
                    description.append(
                        Paragraph('GENERAL DESCRIPTION', styles['Heading3']))
                    if type(value) is list:
                        for line in value:
                            description.append(
                                Paragraph(cgi.escape(line), styles['Normal']))
                    else:
                        description.append(
                            Paragraph(cgi.escape(value), styles['Normal']))
                    if description:
                        pdf.append(KeepTogether(description))
                    continue
                ###############################################################
                if isSubFile(field):
                    pdf.append(Paragraph(field, styles['Heading3']))
                    self._icrSubFileToPDF(pdf, value, field)
                    continue
                #####################################################
                value = self._convertIndividualFieldValuePDF(field, value)
                row = []
                row.append(Paragraph(field, styles['Heading3']))
                row.append(value)
                pdf.append(KeepTogether(row))
示例#7
0
    def _icrDataEntryToPDF(self, pdf, icrJson):
        # Write the ICR data as a document (list) instead of
        # a table. Otherwise, the rows can become taller than
        # a page and reportlab will fail to create the pdf.

        fieldList = ICR_FILE_KEYWORDS_LIST
        # As we do not have a real schema to define the field order,
        # we will have to guess the order here
        description = ""
        globalReferenceSections = []
        for field in fieldList:
            if field in icrJson: # we have this field
                value = icrJson[field]
                if "GLOBAL REFERENCE" == field:
                    self._writeGlobalReferenceToPDF(value, pdf)
                    continue
                ###############################################################
                if "COMPONENT/ENTRY POINT" == field:
                    self._writeComponentEntryPointToPDF(value, pdf)
                    continue
                ###############################################################
                if "GENERAL DESCRIPTION" == field:
                    description = []
                    description.append(Paragraph('GENERAL DESCRIPTION', styles['Heading3']))
                    if type(value) is list:
                        for line in value:
                          description.append(Paragraph(cgi.escape(line), styles['Normal']))
                    else:
                        description.append(Paragraph(cgi.escape(value), styles['Normal']))
                    if description:
                        pdf.append(KeepTogether(description))
                    continue
                ###############################################################
                if isSubFile(field):
                    pdf.append(Paragraph(field, styles['Heading3']))
                    self._icrSubFileToPDF(pdf, value, field)
                    continue
                #####################################################
                value = self._convertIndividualFieldValuePDF(field, value)
                row = []
                row.append(Paragraph(field, styles['Heading3']))
                row.append(value)
                pdf.append(KeepTogether(row))
示例#8
0
def _icrSubFileToHtml(output, icrJson, subFile, crossRef):
    fieldList = SUBFILE_FIELDS[subFile]
    if subFile not in fieldList:
        fieldList.append(subFile)
    for icrEntry in icrJson:
        output.write ("<li>\n")
        for field in fieldList:
            if field in icrEntry: # we have this field
                value = icrEntry[field]
                if isSubFile(field) and field != subFile: # avoid recursive subfile for now
                    output.write ("<dl><dt>%s:</dt>\n" % field)
                    output.write ("<dd>\n")
                    output.write ("<ol>\n")
                    _icrSubFileToHtml(output, value, field, crossRef)
                    output.write ("</ol>\n")
                    output.write ("</dd></dl>\n")
                    continue
                value = _convertIndividualFieldValue(field, icrEntry, value, crossRef)
                output.write ("<dt>%s:  &nbsp;&nbsp;%s</dt>\n" % (field, value))
        output.write ("</li>\n")
示例#9
0
def _icrSubFileToHtml(output, icrJson, subFile, crossRef):
    fieldList = SUBFILE_FIELDS[subFile]
    if subFile not in fieldList:
        fieldList.append(subFile)
    for icrEntry in icrJson:
        output.write ("<li>\n")
        for field in fieldList:
            if field in icrEntry: # we have this field
                value = icrEntry[field]
                if isSubFile(field) and field != subFile: # avoid recursive subfile for now
                    if type(value) is list:
                        _writeTableOfValue(output, field, value, crossRef)
                    else:
                        output.write ("<dl><dt>%s:</dt>\n" % field)
                        output.write ("<dd>\n")
                        _icrSubFileToHtml(output, value, field, crossRef)
                        output.write ("</dd></dl>\n")
                    continue
                value = _convertIndividualFieldValue(field, icrEntry, value, crossRef)
                output.write ("<dt>%s:  &nbsp;&nbsp;%s</dt>\n" % (field, value))
        output.write ("</li>\n")
示例#10
0
 def _icrDataEntryToHtml(self, output, icrJson):
     fieldList = ICR_FILE_KEYWORDS_LIST
     # As we do not have a real schema to define the field order,
     # we will have to guess the order here
     for field in fieldList:
         if field in icrJson: # we have this field
             value = icrJson[field]
             if isSubFile(field):
                 output.write ("<tr>\n")
                 output.write("<td>%s</td>\n" % field)
                 output.write("<td>\n")
                 output.write ("<ol>\n")
                 self._icrSubFileToHtml(output, value, field)
                 output.write ("</ol>\n")
                 output.write("</td>\n")
                 output.write ("</tr>\n")
                 continue
             value = self._convertIndividualFieldValue(field, icrJson, value)
             output.write ("<tr>\n")
             output.write ("<td>%s</td>\n" % field)
             output.write ("<td>%s</td>\n" % value)
             output.write ("</tr>\n")
示例#11
0
def _icrDataEntryToHtml(output, icrJson, crossRef):
    fieldList = ['NUMBER'] + ICR_FILE_KEYWORDS_LIST
    # As we do not have a real schema to define the field order,
    # we will have to guess the order here
    for field in fieldList:
        if field in icrJson: # we have this field
            value = icrJson[field]
            if isSubFile(field):
                output.write ("<tr>\n")
                output.write("<td>%s</td>\n" % field)
                output.write("<td>\n")
                if isinstance(value, list) and isinstance(value[0], dict):
                    _writeTableOfValue(output, field, value, crossRef)
                else:
                    _icrSubFileToHtml(output, value, field, crossRef)
                output.write("</td>\n")
                output.write ("</tr>\n")
                continue
            value = _convertIndividualFieldValue(field, icrJson, value,
                                                 crossRef)
            output.write ("<tr>\n")
            output.write ("<td>%s</td>\n" % field)
            output.write ("<td>%s</td>\n" % value)
            output.write ("</tr>\n")
示例#12
0
def _icrDataEntryToHtml(output, icrJson, crossRef):
    fieldList = ['NUMBER'] + ICR_FILE_KEYWORDS_LIST
    # As we do not have a real schema to define the field order,
    # we will have to guess the order here
    for field in fieldList:
        if field in icrJson:  # we have this field
            value = icrJson[field]
            if isSubFile(field):
                output.write("<tr>\n")
                output.write("<td>%s</td>\n" % field)
                output.write("<td>\n")
                if isinstance(value, list) and isinstance(value[0], dict):
                    _writeTableOfValue(output, field, value, crossRef)
                else:
                    _icrSubFileToHtml(output, value, field, crossRef)
                output.write("</td>\n")
                output.write("</tr>\n")
                continue
            value = _convertIndividualFieldValue(field, icrJson, value,
                                                 crossRef)
            output.write("<tr>\n")
            output.write("<td>%s</td>\n" % field)
            output.write("<td>%s</td>\n" % value)
            output.write("</tr>\n")
示例#13
0
    def parse(self, inputFilename, outputFilename):
        with open(inputFilename,'r') as ICRFile:
            curLineNo = 0
            curNumber = None
            # Free text fields may contain field names and
            # need special parsing rules
            DBAComments = False
            generalDescription = False
            subscribingDetails = False
            componentDescription = False

            for line in ICRFile:
                line = line.rstrip("\r\n")
                curLineNo +=1
                # get rid of lines that are ignored
                if self.isIgnoredLine(line):
                    continue
                match = INTEGRATION_REFERENCES_LIST.match(line)
                if match:
                    # Skip this line. Use getDate() to parse date
                    continue
                match = START_OF_RECORD.match(line)
                if match:
                    name = match.group('name')
                    number = match.group('number')
                    skipField = False
                    isFreeTextField = DBAComments or generalDescription or \
                        subscribingDetails or componentDescription
                    if isFreeTextField:
                        # Check if the number is matches what
                        # we're currently processing
                        skipField = number == curNumber
                    if not skipField:
                        curNumber = number

                        DBAComments = False
                        generalDescription = False
                        subscribingDetails = False
                        componentDescription = False

                        self._startOfNewItem(name, number, match, line)
                        continue

                match = GENERIC_START_OF_RECORD.search(line)
                if not match:
                    # DBA Comments doesn't match regex for other fields,
                    # check separately. Even if we get a match here, can't
                    # assume that we're in a DBA Comments field, might be in
                    # a different free text field
                    match = DBA_COMMENTS.match(line)

                if match and match.group('name') in ICR_FILE_KEYWORDS:
                    fieldName = match.group('name')

                    # First check if we are at the end of a free text field
                    if DBAComments:
                        if fieldName in ['DATE/TIME EDITED', 'NUMBER', 'DATE ACTIVATED']:
                            DBAComments = False
                    elif generalDescription:
                        # Starts with exactly 2 spaces
                        if line.startswith("  STATUS:") or fieldName == 'VIEWER':
                            generalDescription = False
                    elif subscribingDetails:
                        # This assumes that 'Subscribing Details' may start
                        # with a field name or may contain 'GLOBAL REFERENCE'
                        # but won't contain any other field names in the middle
                        if fieldName in ICR_FILE_KEYWORDS and \
                          fieldName != 'GLOBAL REFERENCE' and \
                          'SUBSCRIBING DETAILS' in self._curRecord:
                            subscribingDetails = False
                    elif componentDescription:
                        # At most one space before 'VARIABLES:'
                        if line.startswith("VARIABLES:") or \
                          line.startswith(" VARIABLES:") or \
                          fieldName in ['COMPONENT/ENTRY POINT', 'SUBSCRIBING PACKAGE']:
                            componentDescription = False

                    # Are we at the beginning of a free text field?
                    if DBAComments or generalDescription or \
                      subscribingDetails or componentDescription:
                        # Free text fields are never nested
                        pass
                    elif fieldName == 'DBA Comments':
                        DBAComments = True
                    elif fieldName == 'GENERAL DESCRIPTION':
                        generalDescription = True
                    elif fieldName == 'SUBSCRIBING DETAILS':
                        subscribingDetails = True
                    elif fieldName == 'COMPONENT DESCRIPTION':
                        componentDescription = True

                    # Process line
                    # Start with free text fields
                    if DBAComments:
                        fieldName = 'DBA Comments'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            self._curField = fieldName
                            self._rewindStack()
                            name = match.group('name') # this is the name part
                            restOfLine = line[match.end():]
                            self._curRecord[name] = restOfLine.strip()
                    elif generalDescription:
                        fieldName = 'GENERAL DESCRIPTION'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            # Starting to process general description
                            self._curField = fieldName
                            self._rewindStack();
                            self._findKeyValueInLine(match, line)
                    elif subscribingDetails:
                        fieldName = 'SUBSCRIBING DETAILS'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            self._curField = fieldName
                            name = match.group('name') # this is the name part
                            restOfLine = line[match.end():]
                            self._curRecord[name] = restOfLine.strip()
                    elif componentDescription:
                        fieldName = 'COMPONENT DESCRIPTION'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            # Starting to process component description
                            self._curField = fieldName
                            self._rewindStack()
                            self._findKeyValueInLine(match, line)

                    elif isSubFile(fieldName):
                        self._curField = fieldName
                        self._startOfSubFile(match, line)
                    else:
                        # Check to see if fieldName is already in the out list
                        if isWordProcessingField(self._curField):
                            if self._ignoreKeywordInWordProcessingFields(fieldName):
                                self._appendWordsFieldLine(line)
                                continue
                        # figure out where to store the record
                        self._curField = fieldName
                        self._rewindStack()
                        self._findKeyValueInLine(match, line)
                elif self._curField and self._curField in self._curRecord:
                    if not line.strip() and not isWordProcessingField(self._curField):
                        # Ignore blank line
                        continue
                    self._appendWordsFieldLine(line)
                else:
                    if self._curRecord:
                        if not line.strip():
                            continue
                        logger.error('No field associated with line %s: %s ' %
                                      (curLineNo, line))
        # TODO: Copy + paste from '_startOfNewItem()'
        self._curField = None
        self._rewindStack()
        if self._curRecord:
            self._outObject.append(self._curRecord)
        outputDir = os.path.dirname(outputFilename)
        if not os.path.exists(outputDir):
            # Will also create intermediate directories if needed
            os.makedirs(outputDir)
        with open(outputFilename, 'w') as out_file:
            json.dump(self._outObject,out_file, indent=4)
示例#14
0
 def parse(self, inputFilename, outputFilename):
     global date
     with open(inputFilename,'r') as ICRFile:
         for line in ICRFile:
             line = line.rstrip("\r\n")
             self._curLineNo +=1
             # get rid of lines that are ignored
             if self.isIgnoredLine(line):
                 continue
             match = INTEGRATION_REFERENCES_LIST.match(line)
             if match:
                 date = match.group(1).strip()
                 continue
             match = START_OF_RECORD.match(line)
             if match and not self._DBAComments and not self._generalDescription:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
                 if match:
                     self._DBAComments = True
             if match and match.group('name') in ICR_FILE_KEYWORDS:
                 fieldName = match.group('name')
                 if fieldName == 'DBA Comments':
                     self._DBAComments = True
                 elif fieldName == 'GENERAL DESCRIPTION':
                     self._generalDescription = True
                 if self._DBAComments:
                     if fieldName in ICR_FILE_KEYWORDS:
                         self._DBAComments = False
                 elif self._generalDescription:
                     if line.startswith("  STATUS:"):  # Starts with exactly 2 spaces
                         self._generalDescription = False
                 if self._DBAComments:
                     fieldName = 'DBA Comments'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         self._curField = fieldName
                         name = match.group('name') # this is the name part
                         restOfLine = line[match.end():]
                         self._curRecord[name] = restOfLine.strip()
                 elif self._generalDescription:
                     fieldName = 'GENERAL DESCRIPTION'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         # Starting to process general description
                         self._curField = fieldName
                         self._rewindStack();
                         self._findKeyValueInLine(match, line, self._curRecord)
                 elif isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     logger.debug('field name is: %s', fieldName)
                     logger.debug('cur field is: %s', self._curField)
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack();
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(self._curField):
                     logger.warn('Ignore blank line for current field: [%s]', self._curField)
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     print 'No field associated with line %s: %s ' % (self._curLineNo, line)
     logger.info('End of file now')
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         logger.info('Add last record: %s', self._curRecord)
         self._outObject.append(self._curRecord)
     # pprint.pprint(self._outObject);
     with open(outputFilename, 'w') as out_file:
         json.dump(self._outObject,out_file, indent=4)
示例#15
0
    def parse(self, inputFilename, outputFilename):
        with open(inputFilename, 'r') as ICRFile:
            curLineNo = 0
            curNumber = None
            # Free text fields may contain field names and
            # need special parsing rules
            DBAComments = False
            generalDescription = False
            subscribingDetails = False
            componentDescription = False

            for line in ICRFile:
                line = line.rstrip("\r\n")
                curLineNo += 1
                # get rid of lines that are ignored
                if self.isIgnoredLine(line):
                    continue
                match = INTEGRATION_REFERENCES_LIST.match(line)
                if match:
                    # Skip this line. Use getDate() to parse date
                    continue
                match = START_OF_RECORD.match(line)
                if match:
                    name = match.group('name')
                    number = match.group('number')
                    skipField = False
                    isFreeTextField = DBAComments or generalDescription or \
                        subscribingDetails or componentDescription
                    if isFreeTextField:
                        # Check if the number is matches what
                        # we're currently processing
                        skipField = number == curNumber
                    if not skipField:
                        curNumber = number

                        DBAComments = False
                        generalDescription = False
                        subscribingDetails = False
                        componentDescription = False

                        self._startOfNewItem(name, number, match, line)
                        continue

                match = GENERIC_START_OF_RECORD.search(line)
                if not match:
                    # DBA Comments doesn't match regex for other fields,
                    # check separately. Even if we get a match here, can't
                    # assume that we're in a DBA Comments field, might be in
                    # a different free text field
                    match = DBA_COMMENTS.match(line)

                if match and match.group('name') in ICR_FILE_KEYWORDS:
                    fieldName = match.group('name')

                    # First check if we are at the end of a free text field
                    if DBAComments:
                        if fieldName in [
                                'DATE/TIME EDITED', 'NUMBER', 'DATE ACTIVATED'
                        ]:
                            DBAComments = False
                    elif generalDescription:
                        # Starts with exactly 2 spaces
                        if line.startswith(
                                "  STATUS:") or fieldName == 'VIEWER':
                            generalDescription = False
                    elif subscribingDetails:
                        # This assumes that 'Subscribing Details' may start
                        # with a field name or may contain 'GLOBAL REFERENCE'
                        # but won't contain any other field names in the middle
                        if fieldName in ICR_FILE_KEYWORDS and \
                          fieldName != 'GLOBAL REFERENCE' and \
                          'SUBSCRIBING DETAILS' in self._curRecord:
                            subscribingDetails = False
                    elif componentDescription:
                        # At most one space before 'VARIABLES:'
                        if line.startswith("VARIABLES:") or \
                          line.startswith(" VARIABLES:") or \
                          fieldName in ['COMPONENT/ENTRY POINT', 'SUBSCRIBING PACKAGE']:
                            componentDescription = False

                    # Are we at the beginning of a free text field?
                    if DBAComments or generalDescription or \
                      subscribingDetails or componentDescription:
                        # Free text fields are never nested
                        pass
                    elif fieldName == 'DBA Comments':
                        DBAComments = True
                    elif fieldName == 'GENERAL DESCRIPTION':
                        generalDescription = True
                    elif fieldName == 'SUBSCRIBING DETAILS':
                        subscribingDetails = True
                    elif fieldName == 'COMPONENT DESCRIPTION':
                        componentDescription = True

                    # Process line
                    # Start with free text fields
                    if DBAComments:
                        fieldName = 'DBA Comments'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            self._curField = fieldName
                            self._rewindStack()
                            name = match.group('name')  # this is the name part
                            restOfLine = line[match.end():]
                            self._curRecord[name] = restOfLine.strip()
                    elif generalDescription:
                        fieldName = 'GENERAL DESCRIPTION'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            # Starting to process general description
                            self._curField = fieldName
                            self._rewindStack()
                            self._findKeyValueInLine(match, line)
                    elif subscribingDetails:
                        fieldName = 'SUBSCRIBING DETAILS'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            self._curField = fieldName
                            name = match.group('name')  # this is the name part
                            restOfLine = line[match.end():]
                            self._curRecord[name] = restOfLine.strip()
                    elif componentDescription:
                        fieldName = 'COMPONENT DESCRIPTION'
                        if self._curField == fieldName:
                            self._appendWordsFieldLine(line)
                        else:
                            # Starting to process component description
                            self._curField = fieldName
                            self._rewindStack()
                            self._findKeyValueInLine(match, line)

                    elif isSubFile(fieldName):
                        self._curField = fieldName
                        self._startOfSubFile(match, line)
                    else:
                        # Check to see if fieldName is already in the out list
                        if isWordProcessingField(self._curField):
                            if self._ignoreKeywordInWordProcessingFields(
                                    fieldName):
                                self._appendWordsFieldLine(line)
                                continue
                        # figure out where to store the record
                        self._curField = fieldName
                        self._rewindStack()
                        self._findKeyValueInLine(match, line)
                elif self._curField and self._curField in self._curRecord:
                    if not line.strip() and not isWordProcessingField(
                            self._curField):
                        # Ignore blank line
                        continue
                    self._appendWordsFieldLine(line)
                else:
                    if self._curRecord:
                        if not line.strip():
                            continue
                        logger.error('No field associated with line %s: %s ' %
                                     (curLineNo, line))
        # TODO: Copy + paste from '_startOfNewItem()'
        self._curField = None
        self._rewindStack()
        if self._curRecord:
            self._outObject.append(self._curRecord)
        outputDir = os.path.dirname(outputFilename)
        if not os.path.exists(outputDir):
            # Will also create intermediate directories if needed
            os.makedirs(outputDir)
        with open(outputFilename, 'w') as out_file:
            json.dump(self._outObject, out_file, indent=4)
示例#16
0
 def parse(self, inputFilename, outputFilename):
     with open(inputFilename, 'r') as ICRFile:
         curLineNo = 0
         DBAComments = False
         generalDescription = False
         for line in ICRFile:
             line = line.rstrip("\r\n")
             curLineNo += 1
             # get rid of lines that are ignored
             if self.isIgnoredLine(line):
                 continue
             match = INTEGRATION_REFERENCES_LIST.match(line)
             if match:
                 # Skip this line. Use getDate() to parse date
                 continue
             match = START_OF_RECORD.match(line)
             if match and not DBAComments and not generalDescription:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
                 if match:
                     DBAComments = True
             if match and match.group('name') in ICR_FILE_KEYWORDS:
                 fieldName = match.group('name')
                 if fieldName == 'DBA Comments':
                     DBAComments = True
                 elif fieldName == 'GENERAL DESCRIPTION':
                     generalDescription = True
                 if DBAComments:
                     if fieldName in ICR_FILE_KEYWORDS:
                         DBAComments = False
                 elif generalDescription:
                     if line.startswith(
                             "  STATUS:"):  # Starts with exactly 2 spaces
                         generalDescription = False
                 if DBAComments:
                     fieldName = 'DBA Comments'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         self._curField = fieldName
                         name = match.group('name')  # this is the name part
                         restOfLine = line[match.end():]
                         self._curRecord[name] = restOfLine.strip()
                 elif generalDescription:
                     fieldName = 'GENERAL DESCRIPTION'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         # Starting to process general description
                         self._curField = fieldName
                         self._rewindStack()
                         self._findKeyValueInLine(match, line,
                                                  self._curRecord)
                 elif isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(
                                 fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack()
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(
                         self._curField):
                     # Ignore blank line
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     logger.debug('No field associated with line %s: %s ' %
                                  (curLineNo, line))
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         self._outObject.append(self._curRecord)
     outputDir = os.path.dirname(outputFilename)
     if not os.path.exists(outputDir):
         # Will also create intermediate directories if needed
         os.makedirs(outputDir)
     with open(outputFilename, 'w') as out_file:
         json.dump(self._outObject, out_file, indent=4)
示例#17
0
 def parse(self, inputFilename, outputFilename):
     global date
     with open(inputFilename,'r') as ICRFile:
         for line in ICRFile:
             line = line.rstrip("\r\n")
             self._curLineNo +=1
             """ get rid of lines that are ignored """
             if self.isIgnoredLine(line):
                 continue
             match = INTEGRATION_REFERENCES_LIST.match(line)
             if match:
                 date = match.group(1).strip()
                 continue
             match = START_OF_RECORD.match(line)
             if match and not self._DBAComments and not self._generalDescription:
                 self._startOfNewItem(match, line)
                 continue
             match = GENERIC_START_OF_RECORD.search(line)
             if not match:
                 match = DBA_COMMENTS.match(line)
                 if match:
                     self._DBAComments = True
             if match and match.group('name') in ICR_FILE_KEYWORDS:
                 fieldName = match.group('name')
                 if fieldName == 'DBA Comments':
                     self._DBAComments = True
                 elif fieldName == 'GENERAL DESCRIPTION':
                     self._generalDescription = True
                 if self._DBAComments:
                     if fieldName in ICR_FILE_KEYWORDS:
                         self._DBAComments = False
                 elif self._generalDescription:
                     if line.startswith("  STATUS:"):  # Starts with exactly 2 spaces
                         self._generalDescription = False
                 if self._DBAComments:
                     fieldName = 'DBA Comments'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         self._curField = fieldName
                         name = match.group('name') # this is the name part
                         restOfLine = line[match.end():]
                         self._curRecord[name] = restOfLine.strip()
                 elif self._generalDescription:
                     fieldName = 'GENERAL DESCRIPTION'
                     if self._curField == fieldName:
                         self._appendWordsFieldLine(line)
                     else:
                         self._curField = fieldName
                         name = match.group('name') # this is the name part
                         restOfLine = line[match.end():]
                         self._curRecord[name] = restOfLine.strip()
                 elif isSubFile(fieldName):
                     self._curField = fieldName
                     self._startOfSubFile(match, line)
                 else:
                     logger.debug('field name is: %s', fieldName)
                     logger.debug('cur field is: %s', self._curField)
                     """ Check to see if fieldName is already in the out list """
                     if isWordProcessingField(self._curField):
                         if self._ignoreKeywordInWordProcessingFields(fieldName):
                             self._appendWordsFieldLine(line)
                             continue
                     # figure out where to store the record
                     self._curField = fieldName
                     self._rewindStack();
                     self._findKeyValueInLine(match, line, self._curRecord)
             elif self._curField and self._curField in self._curRecord:
                 if len(line.strip()) == 0 and not isWordProcessingField(self._curField):
                     logger.warn('Ignore blank line for current field: [%s]', self._curField)
                     continue
                 self._appendWordsFieldLine(line)
             else:
                 if self._curRecord:
                     if len(line.strip()) == 0:
                         continue
                     print 'No field associated with line %s: %s ' % (self._curLineNo, line)
     logger.info('End of file now')
     if len(self._curStack) > 0:
         self._curField = None
         self._rewindStack()
     if self._curRecord:
         logger.info('Add last record: %s', self._curRecord)
         self._outObject.append(self._curRecord)
     # pprint.pprint(self._outObject);
     with open(outputFilename, 'w') as out_file:
         json.dump(self._outObject,out_file, indent=4)