Python Document.title示例

编程语言: Python

命名空间/包名称: documents.document

类/类型: Document

方法/功能: title

hotexamples.com的示例: 5

Python Document.title - 已找到5个示例。这些是从开源项目中提取的最受好评的documents.document.Document.title现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

title(5)

Document(5)

content(4)

download_date(1)

guid(1)

original_summary(1)

provider(1)

publish_date(1)

set_id(1)

set_tokens(1)

source_url(1)

words(1)

示例#1

显示文件

文件： clustering.py 项目： ruiaf/sumnews

 def test_very_similar_docs(self):
     doc1 = Document()
     doc2 = Document()
     doc1.title = "russia invades ukraine"
     doc1.content = "russia invades ukraine"
     doc2.title = "russia invaded by ukraine"
     doc2.content = "russia invaded ukraine"
     distance = self.state.repository.clustering.comparator.similarity(doc1, doc2)
     logging.info("Similarity is %2.2f", distance)
     self.assertGreater(distance, 0.3)

示例#2

显示文件

文件： clustering.py 项目： ruiaf/sumnews

 def test_somewhat_different_docs(self):
     doc1 = Document()
     doc2 = Document()
     doc1.title = "russia bought cars in portugal"
     doc1.content = "russia went to portugal and bought 5 new cars"
     doc2.title = "portugal is cool"
     doc2.content = "portugal is very very cool"
     distance = self.state.repository.clustering.comparator.similarity(doc1, doc2)
     logging.info("Similarity is %2.2f", distance)
     logging.info("tf_idf of \"is\": %2.5f", self.state.repository.index.tf_idf("is"))
     logging.info("tf_idf of \"portugal\": %2.5f", self.state.repository.index.tf_idf("portugal"))
     logging.info("tf_idf of \"russia\": %2.5f", self.state.repository.index.tf_idf("russia"))
     self.assertLess(distance, 0.15)

示例#3

显示文件

文件： clustering.py 项目： ruiaf/sumnews

 def test_very_different_docs(self):
     doc1 = Document()
     doc2 = Document()
     doc1.title = "russia invades ukraine"
     doc1.content = "russia is invading ukraine again"
     doc2.title = "portugal is cool"
     doc2.content = "portugal is very very cool"
     distance = self.state.repository.clustering.comparator.similarity(doc1, doc2)
     logging.info("Similarity is %2.2f", distance)
     logging.info("tf_idf of \"is\": %2.5f", self.state.repository.index.tf_idf("is"))
     logging.info("tf_idf of \"portugal\": %2.5f", self.state.repository.index.tf_idf("portugal"))
     logging.info("tf_idf of \"ukraine\": %2.5f", self.state.repository.index.tf_idf("ukraine"))
     self.assertLess(distance, 0.05)

示例#4

显示文件

文件： clustering.py 项目： ruiaf/sumnews

    def test_similar_docs_complex(self):
        doc1 = Document()
        doc2 = Document()
        doc1.title = "Fred Phelps, Head Of Westboro Baptist Church, Dies"
        doc1.content = "Fred Phelps, anti-gay activist and patriarch of the Westboro Baptist Church, has died at age 84. Frank Morris of KCUR reports on the interesting past of one of the most reviled men in America. "
        doc2.title = "Westboro Baptist Church Says Leader Fred Phelps 'Has Gone The Way of All Flesh'"
        doc2.content = "The Bible-thumping, anti-gay preacher was known for picketing funerals."
        distance = self.state.repository.clustering.comparator.similarity(doc1, doc2)
        for word in doc1.words() & doc2.words():
            logging.info("tf_idf of \"%s\": %2.5f", word, self.state.repository.index.tf_idf(word))

        for word in doc1.words() ^ doc2.words():
            logging.info("tf_idf of non intersection \"%s\": %2.5f", word, self.state.repository.index.tf_idf(word))
        logging.info("Similarity is %2.2f", distance)
        self.assertGreater(distance, 0.15)

示例#5

显示文件

文件： feed.py 项目： ruiaf/sumnews

    def get_docs_from_xml(self, root):
        docs = []
        for channel in root:
            for item in channel.findall("item"):
                new_doc = Document()
                new_doc.title = item.find("title").text or ""

                new_doc.download_date = datetime.now(tz.tzutc())
                new_doc.publish_date = dateparser.parse(item.find("pubDate").text, "") or new_doc.download_date
                if new_doc.publish_date.tzinfo is None or self.force_timezone:
                    new_doc.publish_date=new_doc.publish_date.replace(tzinfo=self.timezone)
                new_doc.publish_date = new_doc.publish_date.astimezone(tz.tzutc())

                new_doc.source_url = item.find("link").text or ""

                new_doc.original_summary = strip_html(item.find("description").text or "")

                if item.find("guid"):
                    new_doc.guid = hashlib.md5(item.find("guid").encode('utf-8')).hexdigest()
                else:
                    new_doc.guid = hashlib.md5(new_doc.source_url.encode('utf-8')).hexdigest()
                new_doc.provider = self.name

                if new_doc.guid not in self.processed_guids:
                    self.processed_guids[new_doc.guid] = True
                    self.document_count += 1
                    docs.append(new_doc)

        return docs