Пример #1
0
    def test_analyser(self):
        analyser = TextAnalyser(related=False)
        self.assertIsInstance(analyser, TextAnalyser)

        for _, text in load_texts():

            try:

                analyser.fit(text)
                self.assertTrue(hasattr(analyser, 'textrank_'))
                self.assertIsInstance(analyser.textrank_, TextRank)
                self.assertTrue(hasattr(analyser, 'articles_'))

                output = analyser.to_dict()
                self.assertIs(type(output), dict)
                self.assertIn('articles', output)
                self.assertIn('graph', output)

                keywords = analyser.textrank_.get_keywords(max_kws=10)
                self.assertIs(type(keywords), list)
                self.assertTrue(all(type(kw) is dict for kw in keywords))
                logger.debug(str(keywords))

            except NLPModelNotFound as e:
                logger.error(e)
Пример #2
0
    def test_ui_under_pressure(self):
        logger.debug("Start hammering the server")
        queue = Queue()
        csrf_token = self._get_csrf_token()

        def threader():
            while True:
                text = queue.get()
                self._test_post_request(text, csrf_token)
                sleep(.5)
                queue.task_done()

        for _ in range(self.N_WORKERS):
            t = Thread(target=threader)
            t.daemon = True
            t.start()

        texts = list(load_texts("articles.txt"))

        i = 0
        while i < self.MAX_REQUESTS:
            i += 1
            _, text = random.choice(texts)
            queue.put(text)

        queue.join()
Пример #3
0
    def test_ui(self):
        logger.debug(f"Start ui test @ {UI_LOCATION}")
        csrf_token = self._get_csrf_token()
        assert bool(csrf_token)

        for i, (_, text) in enumerate(load_texts()):
            self._test_post_request(text, csrf_token)
Пример #4
0
def test_spacy():
    for _, text in load_texts("texts.txt"):
        document = nlp(text)
        import ipdb
        ipdb.set_trace()
        for sentence in document.sents:
            for chunk in sentence.noun_chunks:
                print(chunk.text, chunk.label_, chunk.ent_id_)
Пример #5
0
def test_summa():
    for _, text in load_texts():
        try:
            _test_summa(text)
        except ValueError as e:
            if "No keyword found" in str(e):
                print(f"No keyword found for: {text}")
            else:
                raise e
Пример #6
0
def test_textrank():
    for _, text in load_texts("dutch_texts.txt"):
        document = nlp(text)
        tokens = map(attrgetter('text'), document)
        lemmas = map(lambda token: token.lemma_.lower(), document)
        pos_tags = map(attrgetter('pos_'), document)
        remove_stopwords = TextAnalyser.remove_stopwords(nlp, itemgetter(0))
        features = list(filter(remove_stopwords, zip(tokens, lemmas,
                                                     pos_tags)))
        textrank = TextRank().fit(features, document.sents)
        ranks = textrank.get_keywords(5)
        print(ranks)
Пример #7
0
    def test_under_pressure(self):
        queue = Queue()

        def threader():
            while True:
                params = queue.get()
                self._make_request(params)
                queue.task_done()

        for _ in range(self.N_WORKERS):
            t = Thread(target=threader)
            t.daemon = True
            t.start()

        params = self.DEFAULT_PARAMS.copy()
        for _, text in load_texts():
            params.update({'input_text': text})
            queue.put(params)

        queue.join()
Пример #8
0
    def test_endpoint(self):

        params = self.DEFAULT_PARAMS.copy()
        for _, text in load_texts():
            params.update({'input_text': text})
            self._test_request(params)