Python extract_included_pagenames示例，links.extract_included_pagenames Python示例

示例#1

0

显示文件

    def forwards(self, orm):
        from pages.models import slugify
        from links import extract_included_pagenames

        for page in orm['pages.Page'].objects.all().iterator():
            region = page.region
            included_pages = extract_included_pagenames(page.content)
            print "..recording included pages on %s" % smart_str(page.name)
            for pagename in included_pages:
                page_exists = orm['pages.Page'].objects.filter(
                    slug=slugify(pagename), region=region)
                if page_exists:
                    included_page = page_exists[0]
                else:
                    included_page = None
                if orm.IncludedPage.objects.filter(
                        source=page, included_page=included_page).exists():
                    continue
                if orm.IncludedPage.objects.filter(
                        source=page,
                        included_page_name__iexact=pagename).exists():
                    if included_page:
                        included = orm.IncludedPage.objects.filter(
                            source=page,
                            included_page_name__iexact=pagename)[0]
                        included.included_page = included_page
                        included.save()
                else:
                    included = orm.IncludedPage(
                        source=page,
                        region=region,
                        included_page=included_page,
                        included_page_name=pagename,
                    )
                    included.save()

示例#2

0

显示文件

文件： 0004_populate_included_pages.py 项目： lowiki-org/localwiki-backend-server

    def forwards(self, orm):
        from pages.models import slugify
        from links import extract_included_pagenames

        for page in orm['pages.Page'].objects.all().iterator():
            region = page.region
            included_pages = extract_included_pagenames(page.content)
            print "..recording included pages on %s" % smart_str(page.name)
            for pagename in included_pages:
                page_exists = orm['pages.Page'].objects.filter(slug=slugify(pagename), region=region)
                if page_exists:
                    included_page = page_exists[0]
                else:
                    included_page = None
                if orm.IncludedPage.objects.filter(source=page, included_page=included_page).exists():
                    continue
                if orm.IncludedPage.objects.filter(source=page, included_page_name__iexact=pagename).exists():
                    if included_page:
                        included = orm.IncludedPage.objects.filter(source=page, included_page_name__iexact=pagename)[0]
                        included.included_page = included_page
                        included.save()
                else:
                    included = orm.IncludedPage(
                        source=page,
                        region=region,
                        included_page=included_page,
                        included_page_name=pagename,
                    )
                    included.save()

示例#3

0

显示文件

def record_page_includes(page):
    region = page.region
    included = extract_included_pagenames(page.content)
    for pagename in included:
        included_pg_exists = IncludedPage.objects.filter(
            source=page, region=region, included_page_slug=slugify(pagename))
        if not included_pg_exists:
            page_exists = Page.objects.filter(slug=slugify(pagename),
                                              region=region)
            if page_exists:
                included_page = page_exists[0]
            else:
                included_page = None
            m = IncludedPage(
                source=page,
                region=region,
                included_page=included_page,
                included_page_name=pagename,
                included_page_slug=slugify(pagename),
            )
            m.save()

    included = [slugify(pagename) for pagename in included]
    # Remove included pages they've removed from the page
    to_delete = IncludedPage.objects.filter(
        source=page, region=region).exclude(included_page_slug__in=included)
    for m in to_delete:
        m.delete()

示例#4

0

显示文件

文件： signals.py 项目： lowiki-org/localwiki-backend-server

def record_page_includes(page):
    region = page.region
    included = extract_included_pagenames(page.content)
    for pagename in included:
        included_pg_exists = IncludedPage.objects.filter(
            source=page, region=region,
            included_page_slug=slugify(pagename))
        if not included_pg_exists:
            page_exists = Page.objects.filter(slug=slugify(pagename), region=region)
            if page_exists:
                included_page = page_exists[0]
            else:
                included_page = None
            m = IncludedPage(
                source=page,
                region=region,
                included_page=included_page,
                included_page_name=pagename,
                included_page_slug=slugify(pagename),
            )
            m.save()

    included = [slugify(pagename) for pagename in included]
    # Remove included pages they've removed from the page
    to_delete = IncludedPage.objects.filter(source=page, region=region).exclude(included_page_slug__in=included)
    for m in to_delete:
        m.delete()

示例#5

0

显示文件

    def test_ignore_other_links(self):
        html = """
<p>I love <a href="Parks">outside</a>.</p>
<p>I love <a href="http://example.org/Night">test</a>.</p>
        """
        included_pagenames = extract_included_pagenames(html)
        self.assertFalse('Parks' in included_pagenames)
        self.assertTrue(included_pagenames == [])

示例#6

0

显示文件

    def test_link_unquoting(self):
        html = """
<p>I love <a href="Cats%20and%20dogs" class="includepage plugin right"></a>.</p>
<p>I love <a href="Cats and dogs" class="plugin includepage left"></a>.</p>
        """
        included_pagenames = extract_included_pagenames(html)
        self.assertTrue('Cats and dogs' in included_pagenames)
        self.assertFalse('Cats%20and%20dogs' in included_pagenames)

示例#7

0

显示文件

    def test_ignore_anchors(self):
        html = """
<p>I love <a href="Parks" class="plugin includepage">outside</a>.</p>
<p>I love <a href="#gohere">test</a>.</p>
<p>I love <a>test now</a>.</p>
        """
        included_pagenames= extract_included_pagenames(html)
        self.assertTrue('Parks' in included_pagenames)
        self.assertEqual(len(included_pagenames), 1)

示例#8

0

显示文件

    def test_simple_extraction(self):
        html = """
<p>I love <a href="Parks" class="plugin includepage"></a>.</p>
        """
        included_pagenames = extract_included_pagenames(html)
        self.assertTrue('Parks' in included_pagenames)