Python findHTMLMeta示例，openid.yadis.parsehtml.findHTMLMeta Python示例

示例#1

0

显示文件

文件： discover.py 项目： bpowell123/primeight-indris

def whereIsYadis(resp):
    """Given a HTTPResponse, return the location of the Yadis document.

    May be the URL just retrieved, another URL, or None, if I can't
    find any.

    [non-blocking]

    @returns: str or None
    """
    # Attempt to find out where to go to discover the document
    # or if we already have it
    content_type = resp.headers.get('content-type')

    # According to the spec, the content-type header must be an exact
    # match, or else we have to look for an indirection.
    if (content_type
            and content_type.split(';', 1)[0].lower() == YADIS_CONTENT_TYPE):
        return resp.final_url
    else:
        # Try the header
        yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())

        if not yadis_loc:
            # Parse as HTML if the header is missing.
            #
            # XXX: do we want to do something with content-type, like
            # have a whitelist or a blacklist (for detecting that it's
            # HTML)?
            try:
                yadis_loc = findHTMLMeta(StringIO(resp.body))
            except MetaNotFound:
                pass

        return yadis_loc

示例#2

0

显示文件

文件： discover.py 项目： haldun/python-openid

def whereIsYadis(resp):
    """Given a HTTPResponse, return the location of the Yadis document.

    May be the URL just retrieved, another URL, or None, if I can't
    find any.

    [non-blocking]

    @returns: str or None
    """
    # Attempt to find out where to go to discover the document
    # or if we already have it
    content_type = resp.headers.get('content-type')

    # According to the spec, the content-type header must be an exact
    # match, or else we have to look for an indirection.
    if (content_type and
        content_type.split(';', 1)[0].lower() == YADIS_CONTENT_TYPE):
        return resp.final_url
    else:
        # Try the header
        yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())

        if not yadis_loc:
            # Parse as HTML if the header is missing.
            #
            # XXX: do we want to do something with content-type, like
            # have a whitelist or a blacklist (for detecting that it's
            # HTML)?
            try:
                yadis_loc = findHTMLMeta(StringIO(resp.body))
            except MetaNotFound:
                pass

        return yadis_loc

示例#3

0

显示文件

文件： discover.py 项目： Lennonka/python-openid

def whereIsYadis(resp):
    """Given a HTTPResponse, return the location of the Yadis document.

    May be the URL just retrieved, another URL, or None, if I can't
    find any.

    [non-blocking]

    @returns: str or None
    """
    # Attempt to find out where to go to discover the document
    # or if we already have it
    content_type = resp.headers.get('content-type')

    # According to the spec, the content-type header must be an exact
    # match, or else we have to look for an indirection.
    if content_type and content_type.split(';',
                                           1)[0].lower() == YADIS_CONTENT_TYPE:
        return resp.final_url
    else:
        # Try the header
        yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())

        if not yadis_loc:
            # Parse as HTML if the header is missing.
            #
            # XXX: do we want to do something with content-type, like
            # have a whitelist or a blacklist (for detecting that it's
            # HTML)?

            # Decode body by encoding of file
            content_type = content_type or ''
            encoding = content_type.rsplit(';', 1)
            if len(encoding) == 2 and encoding[1].strip().startswith(
                    'charset='):
                encoding = encoding[1].split('=', 1)[1].strip()
            else:
                encoding = 'UTF-8'

            try:
                content = resp.body.decode(encoding)
            except UnicodeError:
                # Keep encoded version in case yadis location can be found before encoding shut this up.
                # Possible errors will be caught lower.
                content = resp.body

            try:
                yadis_loc = findHTMLMeta(StringIO(content))
            except (MetaNotFound, UnicodeError):
                # UnicodeError: Response body could not be encoded and xrds location
                # could not be found before troubles occurs.
                pass

        return yadis_loc

示例#4

0

显示文件

文件： discover.py 项目： AniX/python-openid

def whereIsYadis(resp):
    """Given a HTTPResponse, return the location of the Yadis document.

    May be the URL just retrieved, another URL, or None, if I can't
    find any.

    [non-blocking]

    @returns: str or None
    """
    # Attempt to find out where to go to discover the document
    # or if we already have it
    content_type = resp.headers.get('content-type')

    # According to the spec, the content-type header must be an exact
    # match, or else we have to look for an indirection.
    if (content_type and
        content_type.split(';', 1)[0].lower() == YADIS_CONTENT_TYPE):
        return resp.final_url
    else:
        # Try the header
        yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())

        if not yadis_loc:
            # Parse as HTML if the header is missing.
            #
            # XXX: do we want to do something with content-type, like
            # have a whitelist or a blacklist (for detecting that it's
            # HTML)?

            # Decode body by encoding of file
            content_type = content_type or ''
            encoding = content_type.rsplit(';', 1)
            if len(encoding) == 2 and encoding[1].strip().startswith('charset='):
                encoding = encoding[1].split('=', 1)[1].strip()
            else:
                encoding = 'UTF-8'

            try:
                content = resp.body.decode(encoding)
            except UnicodeError:
                # Keep encoded version in case yadis location can be found before encoding shut this up.
                # Possible errors will be caught lower.
                content = resp.body

            try:
                yadis_loc = findHTMLMeta(StringIO(content))
            except (MetaNotFound, UnicodeError):
                # UnicodeError: Response body could not be encoded and xrds location
                # could not be found before troubles occurs.
                pass

        return yadis_loc

示例#5

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_multiple_headers(self):
     buff = StringIO(
         '<html><head>'
         '<meta http-equiv="X-XRDS-Location" content="found">'
         '<meta http-equiv="X-XRDS-Location" content="not-found">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#6

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_javascript_in_head(self):
     buff = StringIO(
         '<html><head><script type="text/javascript">document.write("<body>");</script>'
         '<META http-equiv="X-XRDS-Location" content="found">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#7

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_missing_html_tag(self):
     buff = StringIO(
         '<head><meta http-equiv="X-XRDS-Location" content="found">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#8

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_top_level_bogus(self):
     buff = StringIO(
         '</porky><html><head><meta http-equiv="X-XRDS-Location" content="found">'
     )
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#9

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_case_insensitive_header_name(self):
     buff = StringIO(
         '<html><head><meta http-equiv="x-xrds-location" content="found"></head></html>'
     )
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#10

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_missing_html_tag(self):
     buff = StringIO('<head><meta http-equiv="X-XRDS-Location" content="found">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#11

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_empty_string(self):
     buff = StringIO('<head><meta http-equiv="X-XRDS-Location" content="">')
     self.assertEqual(findHTMLMeta(buff), '')

示例#12

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_case_insensitive_header_name(self):
     buff = StringIO('<html><head><meta http-equiv="x-xrds-location" content="found"></head></html>')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#13

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_decimal_entity(self):
     buff = StringIO('<head><meta http-equiv="X-XRDS-Location" content="&#102;ound">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#14

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_empty_string(self):
     buff = StringIO('<head><meta http-equiv="X-XRDS-Location" content="">')
     self.assertEqual(findHTMLMeta(buff), '')

示例#15

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_standard_entity(self):
     buff = StringIO('<head><meta http-equiv="X-XRDS-Location" content="&amp;">')
     self.assertEqual(findHTMLMeta(buff), '&')

示例#16

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_multiple_headers(self):
     buff = StringIO('<html><head>'
                     '<meta http-equiv="X-XRDS-Location" content="found">'
                     '<meta http-equiv="X-XRDS-Location" content="not-found">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#17

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_javascript_in_head(self):
     buff = StringIO('<html><head><script type="text/javascript">document.write("<body>");</script>'
                     '<META http-equiv="X-XRDS-Location" content="found">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#18

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_standard_entity(self):
     buff = StringIO(
         '<head><meta http-equiv="X-XRDS-Location" content="&amp;">')
     self.assertEqual(findHTMLMeta(buff), '&')

示例#19

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_xhtml(self):
     buff = StringIO('<html><head><meta http-equiv="X-XRDS-Location" content="found" /></head></html>')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#20

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_decimal_entity(self):
     buff = StringIO(
         '<head><meta http-equiv="X-XRDS-Location" content="&#102;ound">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#21

0

显示文件

文件： test_parsehtml.py 项目： The-Actual-Damien/openid

 def test_xhtml(self):
     buff = StringIO(
         '<html><head><meta http-equiv="X-XRDS-Location" content="found" /></head></html>'
     )
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#22

0

显示文件

文件： discover.py 项目： BeatrizFerreira/EP1DAS

def whereIsYadis(resp):
    """Given a HTTPResponse, return the location of the Yadis document.

    May be the URL just retrieved, another URL, or None if no suitable URL can
    be found.

    [non-blocking]

    @returns: str or None
    """
    # Attempt to find out where to go to discover the document
    # or if we already have it
    content_type = resp.headers.get('content-type')

    # According to the spec, the content-type header must be an exact
    # match, or else we have to look for an indirection.
    if (content_type and
            content_type.split(';', 1)[0].lower() == YADIS_CONTENT_TYPE):
        return resp.final_url
    else:
        # Try the header
        yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())

        if not yadis_loc:
            # Parse as HTML if the header is missing.
            #
            # XXX: do we want to do something with content-type, like
            # have a whitelist or a blacklist (for detecting that it's
            # HTML)?

            # Decode body by encoding of file
            content_type = content_type or ''
            encoding = content_type.rsplit(';', 1)
            if (len(encoding) == 2 and
                    encoding[1].strip().startswith('charset=')):
                encoding = encoding[1].split('=', 1)[1].strip()
            else:
                encoding = 'utf-8'

            if isinstance(resp.body, bytes):
                try:
                    content = resp.body.decode(encoding)
                except UnicodeError:
                    # All right, the detected encoding has failed. Try with
                    # UTF-8 (even if there was no detected encoding and we've
                    # defaulted to UTF-8, it's not that expensive an operation)
                    try:
                        content = resp.body.decode('utf-8')
                    except UnicodeError:
                        # At this point the content cannot be decoded to a str
                        # using the detected encoding or falling back to utf-8,
                        # so we have to resort to replacing undecodable chars.
                        # This *will* result in broken content but there isn't
                        # anything else that can be done.
                        content = resp.body.decode(encoding, 'replace')
            else:
                content = resp.body

            try:
                yadis_loc = findHTMLMeta(StringIO(content))
            except (MetaNotFound, UnicodeError):
                # UnicodeError: Response body could not be encoded and xrds
                # location could not be found before troubles occur.
                pass

        return yadis_loc

示例#23

0

显示文件

文件： test_parsehtml.py 项目： ziima/python-openid

 def test_top_level_bogus(self):
     buff = StringIO('</porky><html><head><meta http-equiv="X-XRDS-Location" content="found">')
     self.assertEqual(findHTMLMeta(buff), 'found')

示例#24

0

显示文件

文件： discover.py 项目： Ajay00000/Assignments

def whereIsYadis(resp):
    """Given a HTTPResponse, return the location of the Yadis document.

    May be the URL just retrieved, another URL, or None if no suitable URL can
    be found.

    [non-blocking]

    @returns: str or None
    """
    # Attempt to find out where to go to discover the document
    # or if we already have it
    content_type = resp.headers.get('content-type')

    # According to the spec, the content-type header must be an exact
    # match, or else we have to look for an indirection.
    if (content_type
            and content_type.split(';', 1)[0].lower() == YADIS_CONTENT_TYPE):
        return resp.final_url
    else:
        # Try the header
        yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())

        if not yadis_loc:
            # Parse as HTML if the header is missing.
            #
            # XXX: do we want to do something with content-type, like
            # have a whitelist or a blacklist (for detecting that it's
            # HTML)?

            # Decode body by encoding of file
            content_type = content_type or ''
            encoding = content_type.rsplit(';', 1)
            if (len(encoding) == 2
                    and encoding[1].strip().startswith('charset=')):
                encoding = encoding[1].split('=', 1)[1].strip()
            else:
                encoding = 'utf-8'

            if isinstance(resp.body, bytes):
                try:
                    content = resp.body.decode(encoding)
                except UnicodeError:
                    # All right, the detected encoding has failed. Try with
                    # UTF-8 (even if there was no detected encoding and we've
                    # defaulted to UTF-8, it's not that expensive an operation)
                    try:
                        content = resp.body.decode('utf-8')
                    except UnicodeError:
                        # At this point the content cannot be decoded to a str
                        # using the detected encoding or falling back to utf-8,
                        # so we have to resort to replacing undecodable chars.
                        # This *will* result in broken content but there isn't
                        # anything else that can be done.
                        content = resp.body.decode(encoding, 'replace')
            else:
                content = resp.body

            try:
                yadis_loc = findHTMLMeta(StringIO(content))
            except (MetaNotFound, UnicodeError):
                # UnicodeError: Response body could not be encoded and xrds
                # location could not be found before troubles occur.
                pass

        return yadis_loc

示例#25

0

显示文件

文件： discover.py 项目： abtain/Heraldry

def discover(uri):
    """Discover services for a given URI.

    @param uri: The identity URI as a well-formed http or https
        URI. The well-formedness and the protocol are not checked, but
        the results of this function are undefined if those properties
        do not hold.

    @return: DiscoveryResult object

    @raises Exception: Any exception that can be raised by fetching a URL with
        the given fetcher.
    """
    result = DiscoveryResult(uri)
    resp = fetchers.fetch(uri, headers={'Accept': YADIS_ACCEPT_HEADER})
    if resp.status != 200:
        raise DiscoveryFailure(
            'HTTP Response status from identity URL host is not 200. '
            'Got status %r' % (resp.status,), resp)

    # Note the URL after following redirects
    result.normalized_uri = resp.final_url

    # Attempt to find out where to go to discover the document
    # or if we already have it
    result.content_type = resp.headers.get('content-type')

    # According to the spec, the content-type header must be an exact
    # match, or else we have to look for an indirection.
    if (result.content_type and
        result.content_type.split(';', 1)[0].lower() == YADIS_CONTENT_TYPE):
        result.xrds_uri = result.normalized_uri
    else:
        # Try the header
        yadis_loc = resp.headers.get(YADIS_HEADER_NAME.lower())

        if not yadis_loc:
            # Parse as HTML if the header is missing.
            #
            # XXX: do we want to do something with content-type, like
            # have a whitelist or a blacklist (for detecting that it's
            # HTML)?
            try:
                yadis_loc = findHTMLMeta(StringIO(resp.body))
            except MetaNotFound:
                pass

        # At this point, we have not found a YADIS Location URL. We
        # will return the content that we scanned so that the caller
        # can try to treat it as an XRDS if it wishes.
        if yadis_loc:
            result.xrds_uri = yadis_loc
            resp = fetchers.fetch(yadis_loc)
            if resp.status != 200:
                exc = DiscoveryFailure(
                    'HTTP Response status from Yadis host is not 200. '
                    'Got status %r' % (resp.status,), resp)
                exc.identity_url = result.normalized_uri
                raise exc
            result.content_type = resp.headers.get('content-type')

    result.response_text = resp.body
    return result