Пример #1
0
 def handleText(self, text, format=None, stx_level=None):
     """ Handles the raw text, returning headers, body, format """
     headers = {}
     if not format:
         format = self.guessFormat(text)
     if format == 'html':
         parser = SimpleHTMLParser()
         parser.feed(text)
         headers.update(parser.metatags)
         if parser.title:
             headers['Title'] = parser.title
         body = bodyfinder(text)
     else:
         headers, body = parseHeadersBody(text, headers)
         if stx_level:
             self._stx_level = stx_level
     return headers, body, format
Пример #2
0
 def handleText(self, text, format=None, stx_level=None):
     """ Handles the raw text, returning headers, body, format """
     headers = {}
     if not format:
         format = self.guessFormat(text)
     if format == 'html':
         parser = SimpleHTMLParser()
         parser.feed(text)
         headers.update(parser.metatags)
         if parser.title:
             headers['Title'] = parser.title
         body = bodyfinder(text)
     else:
         headers, body = parseHeadersBody(text, headers)
         if stx_level:
             self._stx_level = stx_level
     return headers, body, format
Пример #3
0
    def edit(self, text_format, text, file=''):
        """
        Edit the Document
        """
        self.text = text
        headers = {}
        if file and (type(file) is not type('')):
            contents = file.read()
            if contents:
                text = self.text = contents

        # Now parse out HTML if its applicable, or the plain text,
        # getting any headers passed along in the document
        bodyfound = bodyfinder.search(text)
        ishtml = (text_format == 'html') or (bodyfound is not None)
        if ishtml:
            parser = SimpleHTMLParser()
            parser.feed(text)
            headers.update(parser.metatags)
            if parser.title: headers['Title'] = parser.title
            if bodyfound:
                text = self.text = bodyfound.group('bodycontent')
            text_format = self.text_format = 'html'
        else:
            headers, text = parseHeadersBody(text, headers)
            text_format = self.text_format = 'structured-text'
            self.text = text

        headers['Format'] = self.Format()
        haveheader = headers.has_key
        for key, value in self.getMetadataHeaders():
            if key != 'Format' and not haveheader(key):
                headers[key] = value

        self.editMetadata(
            title=headers['Title'],
            subject=headers['Subject'],
            description=headers['Description'],
            contributors=headers['Contributors'],
            effective_date=headers['Effective_date'],
            expiration_date=headers['Expiration_date'],
            format=headers['Format'],
            language=headers['Language'],
            rights=headers['Rights'],
        )
        self._parse()
Пример #4
0
    def edit(self, text_format, text, file=''):
        """
        Edit the Document
        """
        self.text = text
        headers = {}
        if file and (type(file) is not type('')):
            contents=file.read()
            if contents:
                text = self.text = contents

        # Now parse out HTML if its applicable, or the plain text,
        # getting any headers passed along in the document
        bodyfound = bodyfinder.search(text)
        ishtml = (text_format == 'html') or (bodyfound is not None)
        if ishtml:
            parser = SimpleHTMLParser()
            parser.feed(text)
            headers.update(parser.metatags)
            if parser.title: headers['Title'] = parser.title
            if bodyfound:
                text = self.text = bodyfound.group('bodycontent')
            text_format = self.text_format = 'html'
        else:
            headers, text = parseHeadersBody(text, headers)
            text_format = self.text_format = 'structured-text'
            self.text = text

        headers['Format'] = self.Format()
        haveheader = headers.has_key
        for key, value in self.getMetadataHeaders():
            if key != 'Format' and not haveheader(key):
                headers[key] = value
        
        self.editMetadata(title=headers['Title'],
                          subject=headers['Subject'],
                          description=headers['Description'],
                          contributors=headers['Contributors'],
                          effective_date=headers['Effective_date'],
                          expiration_date=headers['Expiration_date'],
                          format=headers['Format'],
                          language=headers['Language'],
                          rights=headers['Rights'],
                          )
        self._parse()
Пример #5
0
    def handleText(self, text, format=None):
        """ Handles the raw text, returning headers, body, cooked, format """
        headers = {}
        body = cooked = text
        if not format:
            format = self.guessFormat(text)

        if format == 'html':
            parser = SimpleHTMLParser()
            parser.feed(text)
            headers.update(parser.metatags)
            if parser.title:
                headers['Title'] = parser.title
            bodyfound = bodyfinder.search(text)
            if bodyfound:
                cooked = body = bodyfound.group('bodycontent')
        else:
            headers, body = parseHeadersBody(text, headers)
            cooked = _format_stx(text=body)

        return headers, body, cooked, format
Пример #6
0
    def handleText(self, text, format=None):
        """ Handles the raw text, returning headers, body, cooked, format """
        headers = {}
        body = cooked = text
        if not format:
            format = self.guessFormat(text)

        if format == 'html':
            parser = SimpleHTMLParser()
            parser.feed(text)
            headers.update(parser.metatags)
            if parser.title:
                headers['Title'] = parser.title
            bodyfound = bodyfinder.search(text)
            if bodyfound:
                cooked = body = bodyfound.group('bodycontent')
        else:
            headers, body = parseHeadersBody(text, headers)
            cooked = _format_stx(text=body)

        return headers, body, cooked, format
Пример #7
0
    def handleText(self, text, format=None, stx_level=None):
        """ Handles the raw text, returning headers, body, cooked, format """
        headers = {}
        body = cooked = text
        level = stx_level or self._stx_level

        if not format:
            format = self.guessFormat(text)

        if format == 'html':
            parser = SimpleHTMLParser()
            parser.feed(text)
            headers.update(parser.metatags)
            if parser.title:
                headers['Title'] = parser.title
            bodyfound = bodyfinder(text)
            if bodyfound:
                cooked = body = bodyfound
        else:
            headers, body = parseHeadersBody(text, headers)
            cooked = _format_stx(text=body, level=level)
            self._stx_level = level

        return headers, body, cooked, format