Python TextBlockParser.extract_urls示例

编程语言: Python

命名空间/包名称: oembed.parsers.text

类/类型: TextBlockParser

方法/功能: extract_urls

hotexamples.com的示例: 6

Python TextBlockParser.extract_urls - 已找到6个示例。这些是从开源项目中提取的最受好评的oembed.parsers.text.TextBlockParser.extract_urls现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

TextBlockParser(4)

extract_urls(3)

parse(2)

示例#1

显示文件

文件： parsers.py 项目： Arpaso/djangoembed

class TextBlockParserTestCase(BaseOEmbedTestCase):
    def setUp(self):
        self.parser = TextBlockParser()
        super(TextBlockParserTestCase, self).setUp()
    
    def test_basic_handling(self):
        parsed = self.parser.parse(self.category_url)
        self.assertEqual(parsed, self.category_embed)
    
    def test_inline_link_handling(self):
        parsed = self.parser.parse('Testing %s' % self.category_url)
        self.assertEqual(parsed, 'Testing %s' % self.category_embed)
    
    def test_block_handling(self):
        parsed = self.parser.parse('Testing %(url)s\n%(url)s' % ({'url': self.category_url}))
        self.assertEqual(parsed, 'Testing %(embed)s\n%(embed)s' % ({'embed': self.category_embed}))
    
    def test_urlization(self):
        test_string = 'Testing http://www.google.com'
        parsed = self.parser.parse(test_string, urlize_all_links=False)
        self.assertEqual(parsed, test_string)
        
        parsed = self.parser.parse(test_string, urlize_all_links=True)
        self.assertEqual(parsed, 'Testing <a href="http://www.google.com">http://www.google.com</a>')
    
    def test_extraction(self):
        extracted = self.parser.extract_urls('Testing %s wha?' % self.category_url)
        self.assertEqual(extracted, set([self.category_url]))

示例#2

显示文件

文件： html.py 项目： astrofinch/djangoembed

 def extract_urls(self, text):
     block_parser = TextBlockParser()
     soup = BeautifulSoup(text)
     urls = set()
     
     for user_url in soup.findAll(text=re.compile(URL_RE)):
         if not self.inside_a(user_url):
             urls |= block_parser.extract_urls(unicode(user_url))
     
     return urls

示例#3

显示文件

文件： html.py 项目： tiktuk/djangoembed

    def extract_urls(self, text):
        block_parser = TextBlockParser()
        soup = BeautifulSoup(text)
        urls = set()

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                urls |= block_parser.extract_urls(unicode(user_url))

        return urls

示例#4

显示文件

文件： parsers.py 项目： squamous/djangoembed

class TextBlockParserTestCase(BaseOEmbedTestCase):
    def setUp(self):
        self.parser = TextBlockParser()
        super(TextBlockParserTestCase, self).setUp()
    
    def test_basic_handling(self):
        parsed = self.parser.parse(self.category_url)
        self.assertEqual(parsed, self.category_embed)
    
    def test_inline_link_handling(self):
        parsed = self.parser.parse('Testing %s' % self.category_url)
        self.assertEqual(parsed, 'Testing %s' % self.category_embed)
    
    def test_block_handling(self):
        parsed = self.parser.parse('Testing %(url)s\n%(url)s' % ({'url': self.category_url}))
        self.assertEqual(parsed, 'Testing %(embed)s\n%(embed)s' % ({'embed': self.category_embed}))
    
    def test_urlization(self):
        test_string = 'Testing http://www.google.com'
        parsed = self.parser.parse(test_string, urlize_all_links=False)
        self.assertEqual(parsed, test_string)
        
        parsed = self.parser.parse(test_string, urlize_all_links=True)
        self.assertEqual(parsed, 'Testing <a href="http://www.google.com">http://www.google.com</a>')
    
    def test_extraction(self):
        extracted = self.parser.extract_urls('Testing %s wha?' % self.category_url)
        self.assertEqual(extracted, [self.category_url])
    
    def test_extraction_ordering(self):
        extracted = self.parser.extract_urls('''
            %s %s %s
            %s
        ''' % (self.category_url, self.blog_url, self.category_url, self.rich_url))
        
        self.assertEqual(extracted, [
            self.category_url,
            self.blog_url,
            self.rich_url,
        ])

示例#5

显示文件

文件： html.py 项目： shagi/djangoembed

    def extract_urls(self, text):
        block_parser = TextBlockParser()
        soup = BeautifulSoup(text)
        urls = set()
        url_list = []

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                block_urls = block_parser.extract_urls(unicode(user_url))

                for url in block_urls:
                    if url not in urls:
                        url_list.append(url)
                        urls.add(url)

        return url_list

示例#6

显示文件

文件： html.py 项目： airtonix/django-oembed-provider

    def extract_urls(self, text):
        block_parser = TextBlockParser()
        soup = BeautifulSoup(text)
        urls = set()
        url_list = []

        for user_url in soup.findAll(text=re.compile(URL_RE)):
            if not self.inside_a(user_url):
                block_urls = block_parser.extract_urls(unicode(user_url))
                
                for url in block_urls:
                    if url not in urls:
                        url_list.append(url)
                        urls.add(url)
        
        return url_list