def handleText(self, text, format=None, stx_level=None): """ Handles the raw text, returning headers, body, format """ headers = {} if not format: format = self.guessFormat(text) if format == 'html': parser = SimpleHTMLParser() parser.feed(text) headers.update(parser.metatags) if parser.title: headers['Title'] = parser.title body = bodyfinder(text) else: headers, body = parseHeadersBody(text, headers) if stx_level: self._stx_level = stx_level return headers, body, format
def edit(self, text_format, text, file=''): """ Edit the Document """ self.text = text headers = {} if file and (type(file) is not type('')): contents = file.read() if contents: text = self.text = contents # Now parse out HTML if its applicable, or the plain text, # getting any headers passed along in the document bodyfound = bodyfinder.search(text) ishtml = (text_format == 'html') or (bodyfound is not None) if ishtml: parser = SimpleHTMLParser() parser.feed(text) headers.update(parser.metatags) if parser.title: headers['Title'] = parser.title if bodyfound: text = self.text = bodyfound.group('bodycontent') text_format = self.text_format = 'html' else: headers, text = parseHeadersBody(text, headers) text_format = self.text_format = 'structured-text' self.text = text headers['Format'] = self.Format() haveheader = headers.has_key for key, value in self.getMetadataHeaders(): if key != 'Format' and not haveheader(key): headers[key] = value self.editMetadata( title=headers['Title'], subject=headers['Subject'], description=headers['Description'], contributors=headers['Contributors'], effective_date=headers['Effective_date'], expiration_date=headers['Expiration_date'], format=headers['Format'], language=headers['Language'], rights=headers['Rights'], ) self._parse()
def edit(self, text_format, text, file=''): """ Edit the Document """ self.text = text headers = {} if file and (type(file) is not type('')): contents=file.read() if contents: text = self.text = contents # Now parse out HTML if its applicable, or the plain text, # getting any headers passed along in the document bodyfound = bodyfinder.search(text) ishtml = (text_format == 'html') or (bodyfound is not None) if ishtml: parser = SimpleHTMLParser() parser.feed(text) headers.update(parser.metatags) if parser.title: headers['Title'] = parser.title if bodyfound: text = self.text = bodyfound.group('bodycontent') text_format = self.text_format = 'html' else: headers, text = parseHeadersBody(text, headers) text_format = self.text_format = 'structured-text' self.text = text headers['Format'] = self.Format() haveheader = headers.has_key for key, value in self.getMetadataHeaders(): if key != 'Format' and not haveheader(key): headers[key] = value self.editMetadata(title=headers['Title'], subject=headers['Subject'], description=headers['Description'], contributors=headers['Contributors'], effective_date=headers['Effective_date'], expiration_date=headers['Expiration_date'], format=headers['Format'], language=headers['Language'], rights=headers['Rights'], ) self._parse()
def handleText(self, text, format=None): """ Handles the raw text, returning headers, body, cooked, format """ headers = {} body = cooked = text if not format: format = self.guessFormat(text) if format == 'html': parser = SimpleHTMLParser() parser.feed(text) headers.update(parser.metatags) if parser.title: headers['Title'] = parser.title bodyfound = bodyfinder.search(text) if bodyfound: cooked = body = bodyfound.group('bodycontent') else: headers, body = parseHeadersBody(text, headers) cooked = _format_stx(text=body) return headers, body, cooked, format
def handleText(self, text, format=None, stx_level=None): """ Handles the raw text, returning headers, body, cooked, format """ headers = {} body = cooked = text level = stx_level or self._stx_level if not format: format = self.guessFormat(text) if format == 'html': parser = SimpleHTMLParser() parser.feed(text) headers.update(parser.metatags) if parser.title: headers['Title'] = parser.title bodyfound = bodyfinder(text) if bodyfound: cooked = body = bodyfound else: headers, body = parseHeadersBody(text, headers) cooked = _format_stx(text=body, level=level) self._stx_level = level return headers, body, cooked, format