def test_analyser(self): analyser = TextAnalyser(related=False) self.assertIsInstance(analyser, TextAnalyser) for _, text in load_texts(): try: analyser.fit(text) self.assertTrue(hasattr(analyser, 'textrank_')) self.assertIsInstance(analyser.textrank_, TextRank) self.assertTrue(hasattr(analyser, 'articles_')) output = analyser.to_dict() self.assertIs(type(output), dict) self.assertIn('articles', output) self.assertIn('graph', output) keywords = analyser.textrank_.get_keywords(max_kws=10) self.assertIs(type(keywords), list) self.assertTrue(all(type(kw) is dict for kw in keywords)) logger.debug(str(keywords)) except NLPModelNotFound as e: logger.error(e)
def test_ui_under_pressure(self): logger.debug("Start hammering the server") queue = Queue() csrf_token = self._get_csrf_token() def threader(): while True: text = queue.get() self._test_post_request(text, csrf_token) sleep(.5) queue.task_done() for _ in range(self.N_WORKERS): t = Thread(target=threader) t.daemon = True t.start() texts = list(load_texts("articles.txt")) i = 0 while i < self.MAX_REQUESTS: i += 1 _, text = random.choice(texts) queue.put(text) queue.join()
def test_ui(self): logger.debug(f"Start ui test @ {UI_LOCATION}") csrf_token = self._get_csrf_token() assert bool(csrf_token) for i, (_, text) in enumerate(load_texts()): self._test_post_request(text, csrf_token)
def test_spacy(): for _, text in load_texts("texts.txt"): document = nlp(text) import ipdb ipdb.set_trace() for sentence in document.sents: for chunk in sentence.noun_chunks: print(chunk.text, chunk.label_, chunk.ent_id_)
def test_summa(): for _, text in load_texts(): try: _test_summa(text) except ValueError as e: if "No keyword found" in str(e): print(f"No keyword found for: {text}") else: raise e
def test_textrank(): for _, text in load_texts("dutch_texts.txt"): document = nlp(text) tokens = map(attrgetter('text'), document) lemmas = map(lambda token: token.lemma_.lower(), document) pos_tags = map(attrgetter('pos_'), document) remove_stopwords = TextAnalyser.remove_stopwords(nlp, itemgetter(0)) features = list(filter(remove_stopwords, zip(tokens, lemmas, pos_tags))) textrank = TextRank().fit(features, document.sents) ranks = textrank.get_keywords(5) print(ranks)
def test_under_pressure(self): queue = Queue() def threader(): while True: params = queue.get() self._make_request(params) queue.task_done() for _ in range(self.N_WORKERS): t = Thread(target=threader) t.daemon = True t.start() params = self.DEFAULT_PARAMS.copy() for _, text in load_texts(): params.update({'input_text': text}) queue.put(params) queue.join()
def test_endpoint(self): params = self.DEFAULT_PARAMS.copy() for _, text in load_texts(): params.update({'input_text': text}) self._test_request(params)