def test_tokenization(self): """ The whether the elasticsearch analyzer yields the right tokens for the german analyzer. Check the comments in mainapp.documents.index for more details """ tokenizations = { "die": [], "hunde": ["hunde", "hund"], "wi-fi": ["wi", "fi"], "Feuerwehr": ["feuerwehr"], # Would ideally split the words "oktopoden": ["oktopoden", "oktopod"], "Äpfel": ["äpfel", "apfel"], "ging": ["ging"], "schwierigste": ["schwierigste", "schwierig"], "1234/89": ["1234", "89"], # Would be better if it included "1234/89" } text_analyzer = get_text_analyzer("german") elastic_index = Index("mst-test-tokenization") if not elastic_index.exists(): elastic_index.create() elastic_index.close() elastic_index.analyzer(text_analyzer) elastic_index.save() elastic_index.open() elastic_index.flush() for word, expected_tokens in tokenizations.items(): analysis = elastic_index.analyze( body={"analyzer": "text_analyzer", "text": word} ) actual_tokens = [i["token"] for i in analysis["tokens"]] self.assertEqual(expected_tokens, actual_tokens, "Word was {}".format(word))
def rebuild_index_old(request): index_name=request.GET.get("name","profils") es = Index(index_name,using="default") if es.exists():es.delete() es.create("default") es.save("default") return Response({"message": "Reconstruction de l'index "+index_name+" terminée"})