示例#1
0
    def _get_vae_estimator(self, config, reporter, ctx):
        """Take a model configuration - specified by a config file or as determined by model selection and 
        return a VAE topic model ready for training.

        Parameters:
            config (dict): an autogluon configuration/argument object, instantiated to particular parameters
            reporter (`autogluon.core.scheduler.reporter.Reporter`): object for reporting model evaluations to scheduler
            ctx (`mxnet.context.Context`): Mxnet compute context
        
        Returns:
            Estimator (:class:`tmnt.estimator.BaseEstimator`): Either BowEstimator or MetaBowEstimator
        """

        lr = config.lr
        latent_distrib = config.latent_distribution
        optimizer = config.optimizer
        n_latent = int(config.n_latent)
        enc_hidden_dim = int(config.enc_hidden_dim)
        coherence_reg_penalty = float(config.coherence_loss_wt)
        redundancy_reg_penalty = float(config.redundancy_loss_wt)
        batch_size = int(config.batch_size)

        embedding_source = config.embedding.source
        fixed_embedding = config.embedding.get('fixed') == True
        covar_net_layers = config.covar_net_layers
        n_encoding_layers = config.num_enc_layers
        enc_dr = config.enc_dr
        epochs = int(config.epochs)
        ldist_def = config.latent_distribution
        kappa = 0.0
        alpha = 1.0
        latent_distrib = ldist_def.dist_type
        if latent_distrib == 'vmf':
            kappa = ldist_def.kappa
        elif latent_distrib == 'logistic_gaussian':
            alpha = ldist_def.alpha
        vocab, emb_size = self._initialize_vocabulary(embedding_source)
        if emb_size < 0 and 'size' in config.embedding:
            emb_size = config.embedding.size

        if self.c_args.use_labels_as_covars:
            #n_covars = len(self.label_map) if self.label_map else 1
            n_covars = -1
            model = \
                MetaBowEstimator(vocab, coherence_coefficient=8.0, reporter=reporter, num_val_words=self.total_tst_words,
                                 wd_freqs=self.wd_freqs,
                           label_map=self.label_map,
                           covar_net_layers=1, ctx=ctx, lr=lr, latent_distribution=latent_distrib, optimizer=optimizer,
                           n_latent=n_latent, kappa=kappa, alpha=alpha, enc_hidden_dim=enc_hidden_dim,
                           coherence_reg_penalty=coherence_reg_penalty,
                           redundancy_reg_penalty=redundancy_reg_penalty, batch_size=batch_size,
                           embedding_source=embedding_source, embedding_size=emb_size, fixed_embedding=fixed_embedding,
                           num_enc_layers=n_encoding_layers, enc_dr=enc_dr, seed_matrix=self.seed_matrix, hybridize=False,
                           epochs=epochs, log_method='log')
        else:
            print("Encoder coherence = {}".format(
                self.c_args.encoder_coherence))
            model = \
                BowEstimator(vocab, coherence_coefficient=8.0, reporter=reporter, num_val_words=self.total_tst_words,
                             wd_freqs=self.wd_freqs,
                       ctx=ctx, lr=lr, latent_distribution=latent_distrib, optimizer=optimizer,
                       n_latent=n_latent, kappa=kappa, alpha=alpha, enc_hidden_dim=enc_hidden_dim,
                       coherence_reg_penalty=coherence_reg_penalty,
                       redundancy_reg_penalty=redundancy_reg_penalty, batch_size=batch_size,
                       embedding_source=embedding_source, embedding_size=emb_size, fixed_embedding=fixed_embedding,
                       num_enc_layers=n_encoding_layers, enc_dr=enc_dr, seed_matrix=self.seed_matrix, hybridize=False,
                             epochs=epochs, log_method='log', coherence_via_encoder=self.c_args.encoder_coherence,
                             pretrained_param_file = self.pretrained_param_file)
        model.validate_each_epoch = self.validate_each_epoch
        return model
示例#2
0
def test_train_and_topics_categorical():
    model = MetaBowEstimator(vocabulary, batch_size=32)
    model.fit(X_scipy, y_numpy)
    model.get_topic_vectors()
    assert (True)
示例#3
0
def test_train_and_npmi_categorical():
    model = MetaBowEstimator(vocabulary, batch_size=32)
    model.fit(X_scipy, y_numpy)
    assert (model._npmi_per_covariate(X_scipy, y_numpy, 10) == 0)
示例#4
0
def test_train_and_transform_categorical():
    model = MetaBowEstimator(vocabulary, batch_size=32)
    model.fit(X_scipy, y_numpy)
    trans = model.transform(X_scipy, y_numpy)
    assert (np.all(trans == trans[0]))
示例#5
0
def test_train_and_perplexity_categorical():
    model = MetaBowEstimator(vocabulary, batch_size=32)
    model.fit(X_scipy, y_numpy)
    model.perplexity(X_scipy, y_numpy)
    assert (True)