示例#1
0
    def test_entity_recognition(self):
        with open("tests/data/sample.txt", "r") as f:
            sample = f.read()
        results = brain.entities(sample)

        expected = [
            "Second Red Scare",
            "Bhagavad Gita",
            "Soviet Union",
            "Phillips",
            "John F. Kennedy",
            "Julius Robert Oppenheimer",
            "World War II",
            "Institute for Advanced Study in Princeton",
            "Trinity",
            "Oppenheimer",
            "New Mexico",
            "Lyndon B. Johnson",
            "United States Atomic Energy Commission",
            "University of California",
            "Manhattan Project",
            "Born",
            "Enrico Fermi",
            "American",
            "Berkeley",
            "Enrico Fermi Award",
        ]

        self.assertEqual(set(results), set(expected))
示例#2
0
    def test_entity_recognition(self):
        with open('tests/data/sample.txt', 'r') as f:
            sample = f.read()
        results = brain.entities(sample)

        expected = [
            'Second Red Scare',
            'Bhagavad Gita',
            'Soviet Union',
            'Phillips',
            'John F. Kennedy',
            'Julius Robert Oppenheimer',
            'World War II',
            'Institute for Advanced Study in Princeton',
            'Trinity',
            'Oppenheimer',
            'New Mexico',
            'Lyndon B. Johnson',
            'United States Atomic Energy Commission',
            'University of California',
            'Manhattan Project',
            'Born',
            'Enrico Fermi',
            'American',
            'Berkeley',
            'Enrico Fermi Award',
        ]

        self.assertEqual(set(results), set(expected))
示例#3
0
文件: article.py 项目: keho98/argos
    def vectorize(self):
        """
        Returns a tuple of vectors representing this article.

        Articles are represented by:
            (bag of words vector, entities vector)
        """
        if self.vectors is None:
            bow_vec = vectorize(self.text)
            ent_vec = vectorize(' '.join(entities(self.text)))
            self.vectors = [bow_vec, ent_vec]
        return self.vectors
示例#4
0
    def vectorize(self):
        """
        Returns a tuple of vectors representing this article.

        Articles are represented by:
            (bag of words vector, entities vector)
        """
        if self.vectors is None:
            bow_vec = vectorize(self.text)
            ent_vec = vectorize(' '.join(entities(self.text)))
            self.vectors = [bow_vec, ent_vec]
        return self.vectors
示例#5
0
文件: article.py 项目: keho98/argos
    def entitize(self):
        """
        Process the article text for entities.
        """
        ents = []
        for e_name in entities(self.text):
            # TO DO: Need to find a way of getting canonical name.

            # Search for the entity.
            slug = slugify(e_name)
            e = Entity.query.get(slug)

            # If one doesn't exist, create a new one.
            if not e:
                e = Entity(e_name)
                db.session.add(e)
                db.session.commit()
            ents.append(e)
        self.entities = ents
示例#6
0
    def entitize(self):
        """
        Process the article text for entities.
        """
        ents = []
        for e_name in entities(self.text):
            # TO DO: Need to find a way of getting canonical name.

            # Search for the entity.
            slug = slugify(e_name)
            e = Entity.query.get(slug)

            # If one doesn't exist, create a new one.
            if not e:
                e = Entity(e_name)
                db.session.add(e)
                db.session.commit()
            ents.append(e)
        self.entities = ents