def parse(self, inputFilename, outputFilename): with open(inputFilename, 'r') as ICRFile: for line in ICRFile: line = line.rstrip("\r\n") self._curLineNo += 1 """ get rid of lines that are ignored """ if self.isIgnoredLine(line): continue match = START_OF_RECORD.match(line) if match: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') if isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: logger.debug('field name is: %s', fieldName) logger.debug('cur field is: %s', self._curField) """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields( fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField( self._curField): logger.warn( 'Ignore blank line for current field: [%s]', self._curField) continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue print 'No field associated with line %s: %s ' % ( self._curLineNo, line) logger.info('End of file now') if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: logger.info('Add last record: %s', self._curRecord) self._outObject.append(self._curRecord) # pprint.pprint(self._outObject); with open(outputFilename, 'w') as out_file: json.dump(self._outObject, out_file, indent=4)
def parse(self, inputFilename, outputFilename): with open(inputFilename, "r") as ICRFile: for line in ICRFile: line = line.rstrip("\r\n") self._curLineNo += 1 """ get rid of lines that are ignored """ if self.isIgnoredLine(line): continue match = START_OF_RECORD.match(line) if match: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match and match.group("name") in ICR_FILE_KEYWORDS: fieldName = match.group("name") if isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: logger.debug("field name is: %s", fieldName) logger.debug("cur field is: %s", self._curField) """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField(self._curField): logger.warn("Ignore blank line for current field: [%s]", self._curField) continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue print "No field associated with line %s: %s " % (self._curLineNo, line) logger.info("End of file now") if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: logger.info("Add last record: %s", self._curRecord) self._outObject.append(self._curRecord) # pprint.pprint(self._outObject); with open(outputFilename, "w") as out_file: json.dump(self._outObject, out_file, indent=4)
def _findKeyValueInLine(self, match, line): """ parse all name value pair in a line and put back in self._curRecord""" name = match.group('name') # this is the name part """ add logic to ignore some of the field """ # now find if there is any other name value pair in the same line restOfLine = line[match.end():] allFlds = [] if name in ICR_FILE_KEYWORDS: allFlds = [name] allmatches = [] for m in GENERIC_FIELD_RECORD.finditer(restOfLine): if m.group('name') in ICR_FILE_KEYWORDS: # ignore non-keyword allmatches.append(m) allFlds.append(m.group('name')) if allmatches: changeField = False for idx, rm in enumerate(allmatches): if idx == 0 and name in ICR_FILE_KEYWORDS: val = restOfLine[:rm.start()].strip() self._curRecord[name] = val changeField = not (name == 'DESCRIPTION' and val == "") if idx == len(allmatches) - 1: if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields( rm.group('name')): self._appendWordsFieldLine(restOfLine) elif changeField: self._curField = rm.group('name') self._curRecord[ self._curField] = restOfLine[rm.end():].strip() else: self._curRecord[rm.group( 'name')] = restOfLine[rm.end():].strip() else: if changeField: self._curField = allmatches[idx - 1].group('name') self._curRecord[self._curField] = restOfLine[ allmatches[idx - 1].end():rm.start()].strip() else: self._curRecord[allmatches[idx - 1].group( 'name')] = restOfLine[allmatches[idx - 1].end():rm. start()].strip() else: if name == 'GENERAL DESCRIPTION': self._curRecord[name] = [line[match.end():].strip()] else: self._curRecord[name] = line[match.end():].strip() dtFields = set(allFlds) & DATE_TIME_FIELD for fld in dtFields: self._curRecord[fld] = self._convertDateTimeField( self._curRecord[fld])
def _convertIndividualFieldValue(field, icrEntry, value, crossRef): if isWordProcessingField(field): if type(value) is list: value = "\n".join(value) value = '<pre>\n' + cgi.escape(value) + '\n</pre>\n' return value if field in FIELD_CONVERT_MAP: if type(value) is list: return value value = FIELD_CONVERT_MAP[field](value, icrEntry, crossRef=crossRef) return value return value
def _convertIndividualFieldValue(self, field, icrEntry, value): if isWordProcessingField(field): if type(value) is list: value = "\n".join(value) value = '<pre>\n' + cgi.escape(value) + '\n</pre>\n' return value if field in field_convert_map: if type(value) is list: logger.warn('field: [%s], value:[%s], icrEntry: [%s]', field, value, icrEntry) return value value = field_convert_map[field](value, icrEntry, crossRef=self._crossRef) return value return value
def _findKeyValueInLine(self, match, line): """ parse all name value pair in a line and put back in self._curRecord""" name = match.group('name'); # this is the name part """ add logic to ignore some of the field """ # now find if there is any other name value pair in the same line restOfLine = line[match.end():] allFlds = [] if name in ICR_FILE_KEYWORDS: allFlds = [name] allmatches = [] for m in GENERIC_FIELD_RECORD.finditer(restOfLine): if m.group('name') in ICR_FILE_KEYWORDS: # ignore non-keyword allmatches.append(m) allFlds.append(m.group('name')) if allmatches: changeField = False for idx, rm in enumerate(allmatches): if idx == 0 and name in ICR_FILE_KEYWORDS: val = restOfLine[:rm.start()].strip() self._curRecord[name] = val changeField = not(name == 'DESCRIPTION' and val == "") if idx == len(allmatches) -1: if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(rm.group('name')): self._appendWordsFieldLine(restOfLine) elif changeField: self._curField = rm.group('name') self._curRecord[self._curField] = restOfLine[rm.end():].strip() else: self._curRecord[rm.group('name')] = restOfLine[rm.end():].strip() else: if changeField: self._curField = allmatches[idx-1].group('name') self._curRecord[self._curField] = restOfLine[allmatches[idx-1].end():rm.start()].strip() else: self._curRecord[allmatches[idx-1].group('name')] = restOfLine[allmatches[idx-1].end():rm.start()].strip() else: if name == 'GENERAL DESCRIPTION': self._curRecord[name] = [line[match.end():].strip()] else: self._curRecord[name] = line[match.end():].strip() dtFields = set(allFlds) & DATE_TIME_FIELD for fld in dtFields: self._curRecord[fld] = self._convertDateTimeField(self._curRecord[fld])
def _convertIndividualFieldValuePDF(self, field, value, writeField=False, keepTogether=True): if isWordProcessingField(field): if type(value) is list: cell = [] for item in value: text = cgi.escape(item) if writeField: text = "%s : %s" % (field, text) # TODO: "Field:" should not be styled as 'Code' cell.append(Paragraph(text, styles['Normal'])) if keepTogether: return KeepTogether(cell) else: return cell else: text = cgi.escape(value) if writeField: text = "%s : %s" % (field, text) # TODO: "Field:" should not be styled as 'Code' return Paragraph(text, styles['Normal']) if type(value) is list: cell = [] for item in value: text = item if writeField: text = "%s : %s" % (field, text) cell.append(Paragraph(text, styles['Normal'])) if keepTogether: return KeepTogether(cell) else: return cell else: text = value if writeField: text = "%s : %s" % (field, text) return Paragraph(text, styles['Normal'])
def _convertIndividualFieldValuePDF(field, value, writeField=False, keepTogether=True): if isWordProcessingField(field): if type(value) is list: cell = [] for item in value: text = cgi.escape(item) if writeField: text = "%s : %s" % (field, text) # TODO: "Field:" should not be styled as 'Code' cell.append(Paragraph(text, STYLES['Normal'])) if keepTogether: return KeepTogether(cell) else: return cell else: text = cgi.escape(value) if writeField: text = "%s : %s" % (field, text) # TODO: "Field:" should not be styled as 'Code' return Paragraph(text, STYLES['Normal']) if type(value) is list: cell = [] for item in value: text = item if writeField: text = "%s : %s" % (field, text) cell.append(Paragraph(text, STYLES['Normal'])) if keepTogether: return KeepTogether(cell) else: return cell else: text = value if writeField: text = "%s : %s" % (field, text) return Paragraph(text, STYLES['Normal'])
def parse(self, inputFilename, outputFilename): with open(inputFilename, 'r') as ICRFile: curLineNo = 0 curNumber = None # Free text fields may contain field names and # need special parsing rules DBAComments = False generalDescription = False subscribingDetails = False componentDescription = False for line in ICRFile: line = line.rstrip("\r\n") curLineNo += 1 # get rid of lines that are ignored if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: # Skip this line. Use getDate() to parse date continue match = START_OF_RECORD.match(line) if match: name = match.group('name') number = match.group('number') skipField = False isFreeTextField = DBAComments or generalDescription or \ subscribingDetails or componentDescription if isFreeTextField: # Check if the number is matches what # we're currently processing skipField = number == curNumber if not skipField: curNumber = number DBAComments = False generalDescription = False subscribingDetails = False componentDescription = False self._startOfNewItem(name, number, match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: # DBA Comments doesn't match regex for other fields, # check separately. Even if we get a match here, can't # assume that we're in a DBA Comments field, might be in # a different free text field match = DBA_COMMENTS.match(line) if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') # First check if we are at the end of a free text field if DBAComments: if fieldName in [ 'DATE/TIME EDITED', 'NUMBER', 'DATE ACTIVATED' ]: DBAComments = False elif generalDescription: # Starts with exactly 2 spaces if line.startswith( " STATUS:") or fieldName == 'VIEWER': generalDescription = False elif subscribingDetails: # This assumes that 'Subscribing Details' may start # with a field name or may contain 'GLOBAL REFERENCE' # but won't contain any other field names in the middle if fieldName in ICR_FILE_KEYWORDS and \ fieldName != 'GLOBAL REFERENCE' and \ 'SUBSCRIBING DETAILS' in self._curRecord: subscribingDetails = False elif componentDescription: # At most one space before 'VARIABLES:' if line.startswith("VARIABLES:") or \ line.startswith(" VARIABLES:") or \ fieldName in ['COMPONENT/ENTRY POINT', 'SUBSCRIBING PACKAGE']: componentDescription = False # Are we at the beginning of a free text field? if DBAComments or generalDescription or \ subscribingDetails or componentDescription: # Free text fields are never nested pass elif fieldName == 'DBA Comments': DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': generalDescription = True elif fieldName == 'SUBSCRIBING DETAILS': subscribingDetails = True elif fieldName == 'COMPONENT DESCRIPTION': componentDescription = True # Process line # Start with free text fields if DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName self._rewindStack() name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process general description self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif subscribingDetails: fieldName = 'SUBSCRIBING DETAILS' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif componentDescription: fieldName = 'COMPONENT DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process component description self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: # Check to see if fieldName is already in the out list if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields( fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif self._curField and self._curField in self._curRecord: if not line.strip() and not isWordProcessingField( self._curField): # Ignore blank line continue self._appendWordsFieldLine(line) else: if self._curRecord: if not line.strip(): continue logger.error('No field associated with line %s: %s ' % (curLineNo, line)) # TODO: Copy + paste from '_startOfNewItem()' self._curField = None self._rewindStack() if self._curRecord: self._outObject.append(self._curRecord) outputDir = os.path.dirname(outputFilename) if not os.path.exists(outputDir): # Will also create intermediate directories if needed os.makedirs(outputDir) with open(outputFilename, 'w') as out_file: json.dump(self._outObject, out_file, indent=4)
def parse(self, inputFilename, outputFilename): global date with open(inputFilename,'r') as ICRFile: for line in ICRFile: line = line.rstrip("\r\n") self._curLineNo +=1 """ get rid of lines that are ignored """ if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: date = match.group(1).strip() continue match = START_OF_RECORD.match(line) if match and not self._DBAComments and not self._generalDescription: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match: self._DBAComments = True if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') if fieldName == 'DBA Comments': self._DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': self._generalDescription = True if self._DBAComments: if fieldName in ICR_FILE_KEYWORDS: self._DBAComments = False elif self._generalDescription: if line.startswith(" STATUS:"): # Starts with exactly 2 spaces self._generalDescription = False if self._DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif self._generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: logger.debug('field name is: %s', fieldName) logger.debug('cur field is: %s', self._curField) """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack(); self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField(self._curField): logger.warn('Ignore blank line for current field: [%s]', self._curField) continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue print 'No field associated with line %s: %s ' % (self._curLineNo, line) logger.info('End of file now') if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: logger.info('Add last record: %s', self._curRecord) self._outObject.append(self._curRecord) # pprint.pprint(self._outObject); with open(outputFilename, 'w') as out_file: json.dump(self._outObject,out_file, indent=4)
def parse(self, inputFilename, outputFilename): global date with open(inputFilename,'r') as ICRFile: for line in ICRFile: line = line.rstrip("\r\n") self._curLineNo +=1 # get rid of lines that are ignored if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: date = match.group(1).strip() continue match = START_OF_RECORD.match(line) if match and not self._DBAComments and not self._generalDescription: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match: self._DBAComments = True if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') if fieldName == 'DBA Comments': self._DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': self._generalDescription = True if self._DBAComments: if fieldName in ICR_FILE_KEYWORDS: self._DBAComments = False elif self._generalDescription: if line.startswith(" STATUS:"): # Starts with exactly 2 spaces self._generalDescription = False if self._DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif self._generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process general description self._curField = fieldName self._rewindStack(); self._findKeyValueInLine(match, line, self._curRecord) elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: logger.debug('field name is: %s', fieldName) logger.debug('cur field is: %s', self._curField) """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack(); self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField(self._curField): logger.warn('Ignore blank line for current field: [%s]', self._curField) continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue print 'No field associated with line %s: %s ' % (self._curLineNo, line) logger.info('End of file now') if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: logger.info('Add last record: %s', self._curRecord) self._outObject.append(self._curRecord) # pprint.pprint(self._outObject); with open(outputFilename, 'w') as out_file: json.dump(self._outObject,out_file, indent=4)
def parse(self, inputFilename, outputFilename): with open(inputFilename, 'r') as ICRFile: curLineNo = 0 DBAComments = False generalDescription = False for line in ICRFile: line = line.rstrip("\r\n") curLineNo += 1 # get rid of lines that are ignored if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: # Skip this line. Use getDate() to parse date continue match = START_OF_RECORD.match(line) if match and not DBAComments and not generalDescription: self._startOfNewItem(match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: match = DBA_COMMENTS.match(line) if match: DBAComments = True if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') if fieldName == 'DBA Comments': DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': generalDescription = True if DBAComments: if fieldName in ICR_FILE_KEYWORDS: DBAComments = False elif generalDescription: if line.startswith( " STATUS:"): # Starts with exactly 2 spaces generalDescription = False if DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process general description self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line, self._curRecord) elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: """ Check to see if fieldName is already in the out list """ if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields( fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line, self._curRecord) elif self._curField and self._curField in self._curRecord: if len(line.strip()) == 0 and not isWordProcessingField( self._curField): # Ignore blank line continue self._appendWordsFieldLine(line) else: if self._curRecord: if len(line.strip()) == 0: continue logger.debug('No field associated with line %s: %s ' % (curLineNo, line)) if len(self._curStack) > 0: self._curField = None self._rewindStack() if self._curRecord: self._outObject.append(self._curRecord) outputDir = os.path.dirname(outputFilename) if not os.path.exists(outputDir): # Will also create intermediate directories if needed os.makedirs(outputDir) with open(outputFilename, 'w') as out_file: json.dump(self._outObject, out_file, indent=4)
def parse(self, inputFilename, outputFilename): with open(inputFilename,'r') as ICRFile: curLineNo = 0 curNumber = None # Free text fields may contain field names and # need special parsing rules DBAComments = False generalDescription = False subscribingDetails = False componentDescription = False for line in ICRFile: line = line.rstrip("\r\n") curLineNo +=1 # get rid of lines that are ignored if self.isIgnoredLine(line): continue match = INTEGRATION_REFERENCES_LIST.match(line) if match: # Skip this line. Use getDate() to parse date continue match = START_OF_RECORD.match(line) if match: name = match.group('name') number = match.group('number') skipField = False isFreeTextField = DBAComments or generalDescription or \ subscribingDetails or componentDescription if isFreeTextField: # Check if the number is matches what # we're currently processing skipField = number == curNumber if not skipField: curNumber = number DBAComments = False generalDescription = False subscribingDetails = False componentDescription = False self._startOfNewItem(name, number, match, line) continue match = GENERIC_START_OF_RECORD.search(line) if not match: # DBA Comments doesn't match regex for other fields, # check separately. Even if we get a match here, can't # assume that we're in a DBA Comments field, might be in # a different free text field match = DBA_COMMENTS.match(line) if match and match.group('name') in ICR_FILE_KEYWORDS: fieldName = match.group('name') # First check if we are at the end of a free text field if DBAComments: if fieldName in ['DATE/TIME EDITED', 'NUMBER', 'DATE ACTIVATED']: DBAComments = False elif generalDescription: # Starts with exactly 2 spaces if line.startswith(" STATUS:") or fieldName == 'VIEWER': generalDescription = False elif subscribingDetails: # This assumes that 'Subscribing Details' may start # with a field name or may contain 'GLOBAL REFERENCE' # but won't contain any other field names in the middle if fieldName in ICR_FILE_KEYWORDS and \ fieldName != 'GLOBAL REFERENCE' and \ 'SUBSCRIBING DETAILS' in self._curRecord: subscribingDetails = False elif componentDescription: # At most one space before 'VARIABLES:' if line.startswith("VARIABLES:") or \ line.startswith(" VARIABLES:") or \ fieldName in ['COMPONENT/ENTRY POINT', 'SUBSCRIBING PACKAGE']: componentDescription = False # Are we at the beginning of a free text field? if DBAComments or generalDescription or \ subscribingDetails or componentDescription: # Free text fields are never nested pass elif fieldName == 'DBA Comments': DBAComments = True elif fieldName == 'GENERAL DESCRIPTION': generalDescription = True elif fieldName == 'SUBSCRIBING DETAILS': subscribingDetails = True elif fieldName == 'COMPONENT DESCRIPTION': componentDescription = True # Process line # Start with free text fields if DBAComments: fieldName = 'DBA Comments' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName self._rewindStack() name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif generalDescription: fieldName = 'GENERAL DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process general description self._curField = fieldName self._rewindStack(); self._findKeyValueInLine(match, line) elif subscribingDetails: fieldName = 'SUBSCRIBING DETAILS' if self._curField == fieldName: self._appendWordsFieldLine(line) else: self._curField = fieldName name = match.group('name') # this is the name part restOfLine = line[match.end():] self._curRecord[name] = restOfLine.strip() elif componentDescription: fieldName = 'COMPONENT DESCRIPTION' if self._curField == fieldName: self._appendWordsFieldLine(line) else: # Starting to process component description self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif isSubFile(fieldName): self._curField = fieldName self._startOfSubFile(match, line) else: # Check to see if fieldName is already in the out list if isWordProcessingField(self._curField): if self._ignoreKeywordInWordProcessingFields(fieldName): self._appendWordsFieldLine(line) continue # figure out where to store the record self._curField = fieldName self._rewindStack() self._findKeyValueInLine(match, line) elif self._curField and self._curField in self._curRecord: if not line.strip() and not isWordProcessingField(self._curField): # Ignore blank line continue self._appendWordsFieldLine(line) else: if self._curRecord: if not line.strip(): continue logger.error('No field associated with line %s: %s ' % (curLineNo, line)) # TODO: Copy + paste from '_startOfNewItem()' self._curField = None self._rewindStack() if self._curRecord: self._outObject.append(self._curRecord) outputDir = os.path.dirname(outputFilename) if not os.path.exists(outputDir): # Will also create intermediate directories if needed os.makedirs(outputDir) with open(outputFilename, 'w') as out_file: json.dump(self._outObject,out_file, indent=4)