示例#1
0
文件: html.py 项目: khokhlov/hmedia
    def test_decode_entities(self):
        html = u'→'
        self.assertEquals(six.unichr(8594), decode_entities(html))

        html = u'→'
        self.assertEquals(six.unichr(8594), decode_entities(html))

        html = u'→'
        self.assertEquals(six.unichr(8594), decode_entities(html))
示例#2
0
文件: html.py 项目: abaelhe/weblib
    def test_decode_entities(self):
        html = u'→'
        self.assertEquals(six.unichr(8594), decode_entities(html))

        html = u'→'
        self.assertEquals(six.unichr(8594), decode_entities(html))

        html = u'→'
        self.assertEquals(six.unichr(8594), decode_entities(html))
示例#3
0
文件: base.py 项目: khokhlov/hmedia
 def text(self, default=NULL):
     try:
         return normalize_space_func(decode_entities(self.one().group(1)))
     except (AttributeError, IndexError):
         if default is NULL:
             raise DataNotFound
         else:
             return default
示例#4
0
 def text(self, default=NULL):
     try:
         return normalize_space_func(decode_entities(self.one().group(1)))
     except (AttributeError, IndexError):
         if default is NULL:
             raise DataNotFound
         else:
             return default
示例#5
0
def rex_text_list(body, rex, flags=0):
    """
    Return found matches with stripped tags.
    """

    items = []
    for match in rex_list(body, rex, flags=flags):
        items.append(normalize_space(decode_entities(match.group(1))))
    return items
示例#6
0
文件: rex.py 项目: abael/weblib
def rex_text_list(body, rex, flags=0):
    """
    Return found matches with stripped tags.
    """

    items = []
    for match in rex_list(body, rex, flags=flags):
        items.append(normalize_space(decode_entities(match.group(1))))
    return items
示例#7
0
def rex_text(body, regexp, flags=0, default=NULL):
    """
    Search `regexp` expression in `body` text and then strip tags in found result.
    """

    match = rex(body, regexp, flags=flags, default=default)
    try:
        return normalize_space(decode_entities(match.group(1)))
    except AttributeError:
        if default is NULL:
            raise DataNotFound('Regexp not found')
        else:
            return default
示例#8
0
文件: rex.py 项目: abael/weblib
def rex_text(body, regexp, flags=0, default=NULL):
    """
    Search `regexp` expression in `body` text and then strip tags in found result.
    """

    match = rex(body, regexp, flags=flags, default=default)
    try:
        return normalize_space(decode_entities(match.group(1)))
    except AttributeError:
        if default is NULL:
            raise DataNotFound('Regexp not found')
        else:
            return default
示例#9
0
    def rex_text(self, regexp, flags=0, byte=False, default=NULL):
        """
        Search regular expression in response body and return content of first
        matching group.

        :param byte: if False then search is performed in
        `response.unicode_body()` else the rex is searched in `response.body`.
        """

        try:
            match = self.rex_search(regexp, flags=flags, byte=byte)
        except DataNotFound:
            if default is NULL:
                raise DataNotFound('Regexp not found')
            else:
                return default
        else:
            return normalize_space(decode_entities(match.group(1)))
示例#10
0
    def rex_text(self, regexp, flags=0, byte=False, default=NULL):
        """
        Search regular expression in response body and return content of first
        matching group.

        :param byte: if False then search is performed in
        `response.unicode_body()` else the rex is searched in `response.body`.
        """

        try:
            match = self.rex_search(regexp, flags=flags, byte=byte)
        except DataNotFound:
            if default is NULL:
                raise DataNotFound('Regexp not found')
            else:
                return default
        else:
            return normalize_space(decode_entities(match.group(1)))