def test_runner_specify_basic_kernel(): N, V = 10, 20 defn = model_definition(N, V) data = toy_dataset(defn) view = data prng = rng() latent = model.initialize(defn, view, prng) r = runner.runner(defn, view, latent, ["crf"]) r.run(prng, 1)
def test_runner_simple(): N, V = 10, 20 defn = model_definition(N, V) data = toy_dataset(defn) view = data prng = rng() latent = model.initialize(defn, view, prng) r = runner.runner(defn, view, latent) r.run(prng, 1)
def test_lda_zero_iter(self): # compare to model with 0 iterations prng2 = rng(seed=54321) latent2 = model.initialize(self.defn, self.docs, prng2) assert latent2 is not None r2 = runner.runner(self.defn, self.docs, latent2) assert r2 is not None doc_topic2 = latent2.topic_distribution_by_document() assert doc_topic2 is not None assert latent2.perplexity() > self.latent.perplexity()
def test_runner_simple(): N, V = 10, 100 defn = model_definition(N, V) data = toy_dataset(defn) view = numpy_dataview(data) prng = rng() latent = model.initialize(defn, view, prng) kc = runner.default_kernel_config(defn) r = runner.runner(defn, view, latent, kc) r.run(prng, 1)
def test_runner_specify_hp_kernels(): N, V = 10, 20 defn = model_definition(N, V) data = toy_dataset(defn) view = data prng = rng() latent = model.initialize(defn, view, prng) kernels = ['crf'] + \ runner.second_dp_hp_kernel_config(defn) + \ runner.base_dp_hp_kernel_config(defn) r = runner.runner(defn, view, latent, kernels) r.run(prng, 1)
def setup_class(cls): cls._load_docs() cls.niters = 100 if os.environ.get('TRAVIS') else 2 cls.defn = model_definition(cls.N, cls.V) cls.seed = 12345 cls.prng = rng(seed=cls.seed) cls.latent = model.initialize(cls.defn, cls.docs, cls.prng) cls.r = runner.runner(cls.defn, cls.docs, cls.latent) cls.original_perplexity = cls.latent.perplexity() cls.r.run(cls.prng, cls.niters) cls.doc_topic = cls.latent.topic_distribution_by_document()
def test_lda_random_seed(self): # ensure that randomness is contained in rng # by running model twice with same seed niters = 10 # model 1 prng1 = rng(seed=54321) latent1 = model.initialize(self.defn, self.docs, prng1) runner1 = runner.runner(self.defn, self.docs, latent1) runner1.run(prng1, niters) # model2 prng2 = rng(seed=54321) latent2 = model.initialize(self.defn, self.docs, prng2) runner2 = runner.runner(self.defn, self.docs, latent2) runner2.run(prng2, niters) assert_list_equal(latent1.topic_distribution_by_document(), latent2.topic_distribution_by_document()) for d1, d2 in zip(latent1.word_distribution_by_topic(), latent2.word_distribution_by_topic()): assert_dict_equal(d1, d2)
def test_runner_second_dp_valid(): N, V = 10, 20 defn = model_definition(N, V) data = toy_dataset(defn) prng = rng() latent = model.initialize(defn, data, prng) old_beta = latent.beta old_gamma = latent.gamma kernels = ['crf'] + \ runner.second_dp_hp_kernel_config(defn) r = runner.runner(defn, data, latent, kernels) r.run(prng, 10) assert_almost_equals(latent.beta, old_beta) assert_almost_equals(latent.gamma, old_gamma) assert latent.alpha > 0