Python Article.pagenr示例

编程语言: Python

命名空间/包名称: amcat.models.article

类/类型: Article

方法/功能: pagenr

hotexamples.com的示例: 4

Python Article.pagenr - 已找到4个示例。这些是从开源项目中提取的最受好评的amcat.models.article.Article.pagenr现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Article(30)

headline(6)

text(6)

medium(5)

create_articles(4)

date(4)

metastring(3)

section(3)

set_property(3)

exists(2)

fromdict(2)

pagenr(2)

length(1)

author(1)

project(1)

title(1)

url(1)

示例#1

显示文件

    def getarticle(self, headline, lines):
        article = Article(headline=headline)
        text = ""
        for line in lines[2:]:
            if len(line) > 2:
                text += "\n" + line

        text = text.replace("-\n", "")
        text = text.replace("  ", " ")
        text = text.replace("\n", " ")

        article.text = text
        date_pattern = re.compile("([0-9]{2,2})\-([0-9]{2,2})\-([0-9]{4,4})")
        result = date_pattern.search(lines[1])
        article.date = date(int(result.group(3)), int(result.group(2)),
                            int(result.group(1)))
        pagenum_pattern = re.compile("\(p.([0-9]+)([0-9\-]+)?\)")
        result = pagenum_pattern.search(lines[1])
        if result:

            article.pagenr = int(result.group(1))

        for h, medium in self.index:
            if article.headline.lower().strip() in h.lower().strip():
                article.set_property("medium", self.get_medium(medium))

        return article

示例#2

显示文件

文件： bzk_pdf.py 项目： amcat/amcat

    def getarticle(self, headline, lines):
        article = Article(headline = headline)
        text = ""
        for line in lines[2:]:
            if len(line) > 2:
                text += "\n" + line

        text = text.replace("-\n","")
        text = text.replace("  "," ")
        text = text.replace("\n"," ")

        article.text = text
        date_pattern = re.compile("([0-9]{2,2})\-([0-9]{2,2})\-([0-9]{4,4})")
        result = date_pattern.search(lines[1])
        article.date = date(
            int(result.group(3)),
            int(result.group(2)),
            int(result.group(1)))
        pagenum_pattern = re.compile("\(p.([0-9]+)([0-9\-]+)?\)")
        result = pagenum_pattern.search(lines[1])
        if result:
            
            article.pagenr = int(result.group(1))

        for h, medium in self.index:
            if article.headline.lower().strip() in h.lower().strip():
                article.set_property("medium", self.get_medium(medium))

        return article

示例#3

显示文件

文件： bzk_html.py 项目： pombredanne/amcat

    def scrape_1(self, _html, t):
        """format of mostly 2013"""
        if "werkmap" in t:
            divs = _html.cssselect("#articleTable div")
        elif "intranet/rss" in t:
            divs = [
                div for div in _html.cssselect("#sort div")
                if "sort_" in div.get('id')
            ]

        for div in divs:
            article = Article(metastring={})
            article.metastring['html'] = div
            article.headline = div.cssselect("#articleTitle")[0].text_content()
            article.text = div.cssselect("#articleIntro")[0]
            articlepage = div.cssselect("#articlePage")
            if articlepage:
                article.pagenr, article.section = self.get_pagenum(
                    articlepage[0].text)

            article.medium = self.get_medium(
                div.cssselect("#sourceTitle")[0].text)
            date_str = div.cssselect("#articleDate")[0].text
            try:
                article.date = readDate(date_str)
            except ValueError:
                log.error(
                    "parsing date \"{date_str}\" failed".format(**locals()))
            else:
                yield article

示例#4

显示文件

文件： bzk_html.py 项目： BBie/amcat

    def scrape_1(self, _html, t):
        """format of mostly 2013"""
        if "werkmap" in t:
            divs = _html.cssselect("#articleTable div")
        elif "intranet/rss" in t:
            divs = [div for div in _html.cssselect("#sort div") if "sort_" in div.get('id')]
        else:
            raise ValueError("Neither 'werkmap' nor 'intranet/rss' in html.")

        for div in divs:
            article = Article(metastring=div.text_content())
            article.headline = div.cssselect("#articleTitle")[0].text_content()
            article.text = div.cssselect("#articleIntro")[0].text_content()
            articlepage = div.cssselect("#articlePage")

            if articlepage:
                article.pagenr, article.section = self.get_pagenum(articlepage[0].text_content())

            article.medium = self.get_medium(div.cssselect("#sourceTitle")[0].text_content())
            date_str = div.cssselect("#articleDate")[0].text_content()

            try:
                article.date = readDate(date_str)
            except ValueError:
                log.error("parsing date \"{date_str}\" failed".format(**locals()))
            else:
                yield article