示例#1
0
    def test_creation_from_file(self):
        """Test that a Corpus instance is properly created from a file."""
        file_path = os.path.join(self.tempdir.name, "file")
        with open(file_path, "w") as wf:
            for document in self.list_of_documents:
                line = " ".join(document) + "\n"
                wf.write(line)

        corpus_from_file = Corpus.from_document_file(file_path)
        corpus_from_documents = Corpus.from_iterable_of_word_lists(self.list_of_documents)
        self.assertEqual(corpus_from_documents, corpus_from_file, "Loaded corpus is not correct.")
示例#2
0
 def setUp(self):
     """Code to run before every test."""
     self.list_of_documents = [
         ["the", "quick", "brown", "fox"],
         ["jumped", "over"],
         ["the", "lazy", "lazy", "dog"]
     ]
     self.corpus = Corpus.from_iterable_of_word_lists(self.list_of_documents)
示例#3
0
    def setUp(self):
        """Code to run before each test."""
        corpus = Corpus.from_document_file("tests/data/sample_data")
        self.model = GibbsSamplingDMM(corpus, number_of_topics=20)
        self.model.randomly_initialise_topic_assignment(seed=1)
        self.model.inference(5)

        self.tempdir = tempfile.TemporaryDirectory()
示例#4
0
 def setUp(self):
     """Code to run before each test."""
     corpus = Corpus.from_document_file("tests/data/sample_data")
     self.model = GibbsSamplingDMM(corpus, number_of_topics=20)
     self.model.randomly_initialise_topic_assignment(seed=1)
     self.model.inference(100)
     self.generated_documents, self.chosen_topics = self.model.generate_synthetic_documents(
         10, seed=5)
示例#5
0
    def test_speed_of_inference(self):
        """Test that the inference is fast enough."""
        corpus = Corpus.from_document_file("tests/data/sample_data")

        model = GibbsSamplingDMM(corpus, number_of_topics=20)
        model.randomly_initialise_topic_assignment(seed=1)
        number_of_iterations = 200

        t0 = time.time()
        model.inference(number_of_iterations)
        t1 = time.time()

        average_seconds_per_iteration = (t1 - t0) / number_of_iterations
        expected_seconds_per_iteration = 0.02
        delta = 0.05

        if average_seconds_per_iteration < expected_seconds_per_iteration - delta:
            self.fail(
                "Code was faster: actually took {:.5f} per iteration.".format(
                    average_seconds_per_iteration))
        else:
            self.assertAlmostEqual(average_seconds_per_iteration,
                                   expected_seconds_per_iteration,
                                   delta=0.05)
示例#6
0
 def test_bad_equality(self):
     """Test that a Corpus instance is not equal to an integer."""
     corpus = Corpus.from_iterable_of_word_lists(self.list_of_documents)
     with self.assertRaises(TypeError, msg="Comparison to incorrect type should raise an error."):
         bool(corpus == 5)
示例#7
0
 def setUp(self):
     """Code to run before each test."""
     corpus = Corpus.from_document_file("tests/data/sample_data")
     self.model = GibbsSamplingDMM(corpus, number_of_topics=20)
     self.model.randomly_initialise_topic_assignment(seed=1)