def update_target_distribution(self,valid_loader,tol):
        data = []
        labels = []
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
        
        for batch_idx, (inputs, tar,_) in enumerate(valid_loader):
            if use_cuda:
                inputs = inputs.cuda()
            _, tmp_q, _ = self.forward(inputs)
            data.append(tmp_q.data)
            labels.append(tar.cpu().numpy())
        tmp_q = torch.cat(data)
        labels = np.concatenate(labels)
        self.prop = self.target_distribution(tmp_q)

        #evaluate clustering performance
        y_pred = tmp_q.cpu().numpy().argmax(1)

        labels_changed = np.sum(y_pred != self.y_pred_last).astype(
            np.float32) / y_pred.shape[0]
        self.y_pred_last = y_pred

        if labels_changed < tol:
            self.convergence_iter+=1
        else:
            self.convergence_iter = 0 
        return labels_changed, cluster_acc(labels,y_pred)[0]
Exemplo n.º 2
0
    def __initialize_models(self, feat, labels=None):
        self.data_size = feat.shape[0]
        self.feat_dim = feat.shape[1]
        if self.verbose:
            print('Pretraining Cluster Centers by KMeans')
        self.kmeans = KMeans(n_clusters=self.n_clusters,
                             n_init=20,
                             n_jobs=self.max_jobs,
                             verbose=False)
        self.last_pred = self.kmeans.fit_predict(feat)

        if labels is not None:
            tmp_acc = cluster_acc(labels, self.last_pred)
            if self.verbose:
                print('KMeans acc is {}'.format(tmp_acc))

        if self.verbose:
            print('Building Cluster Layer')
        # self.cluster_layer = ClusterNet(torch.Tensor(self.kmeans.cluster_centers_.astype(np.float32)))
        self.cluster_layer = ClusterNet(torch.from_numpy(self.kmeans.cluster_centers_.astype(np.float32)))
        if self.use_cuda:
            self.cluster_layer.cuda()
        if self.verbose:
            print('Building Optimizer')
        self.optimizer = optim.Adam(self.cluster_layer.parameters(), lr=self.lr)
def clustering_creation(df_final, target_column, dataset):

    X = df_final.loc[:, df_final.columns != target_column]
    Y = df_final.loc[:, df_final.columns == target_column]

    clusters = np.linspace(2, len(X.columns), 3, dtype=np.int64, endpoint=True)
    SSE = defaultdict(dict)
    ll = defaultdict(lambda: defaultdict(dict))
    acc = defaultdict(lambda: defaultdict(dict))
    adjMI = defaultdict(lambda: defaultdict(dict))
    SS = defaultdict(lambda: defaultdict(dict))
    SSS = defaultdict(lambda: defaultdict(dict))
    km = kmeans(random_state=5)
    gmm = GMM(random_state=5)

    for k in clusters:
        km.set_params(n_clusters=k)
        gmm.set_params(n_components=k)
        km.fit(X)
        gmm.fit(X)
        SSE[k][dataset] = km.score(X)
        ll[k][dataset]['AIC'] = gmm.aic(X)
        ll[k][dataset]['BIC'] = gmm.bic(X)
        SS[k][dataset]['Kmeans'] = cluster_silhouette_score(X, km.predict(X))
        SS[k][dataset]['GMM'] = cluster_silhouette_score(X, gmm.predict(X))
        SSS[k][dataset]['Kmeans'] = cluster_sample_silhouette_score(
            X, km.predict(X))
        SSS[k][dataset]['GMM'] = cluster_sample_silhouette_score(
            X, gmm.predict(X))
        acc[k][dataset]['Kmeans'] = cluster_acc(Y, km.predict(X))
        acc[k][dataset]['GMM'] = cluster_acc(Y, gmm.predict(X))
        adjMI[k][dataset]['Kmeans'] = ami(Y.squeeze(1), km.predict(X))
        adjMI[k][dataset]['GMM'] = ami(Y.squeeze(1), gmm.predict(X))
        print(k)

        cluster_labels_km = km.predict(X)
        cluster_labels_gm = gmm.predict(X)

        plot_silhouette_score(X, SS, SSS, k, dataset, cluster_labels_km,
                              cluster_labels_gm)
    plot_cluster_accuracy(dataset, acc, clusters)
    plot_cluster_information(dataset, adjMI, clusters)
    KMeans_ELBOW(dataset, SSE, clusters)
    BICandAIC(dataset, ll, clusters)
Exemplo n.º 4
0
    def fit(self, feat, labels=None):
        self.__initialize_models(feat, labels=labels)
        self.__update_target_distribute(feat)

        if self.verbose:
            print('Begin to Iterate')
        index = 0
        for ite in range(int(self.maxiter)):
            if ite % self.update_interval == (self.update_interval - 1):
                self.__update_target_distribute(feat)
                tmp_pred_cur = self.__get_label_pred(self.current_q)
                acc = None
                if labels is not None:
                    acc = cluster_acc(labels, tmp_pred_cur)
                    if self.logger is not None:
                        self.logger.record_acc(acc, ite)
                if self.verbose:
                    if acc is not None:
                        print('Iter {} Acc {}'.format(ite,acc))
                    else:
                        print('Update Target Distribution in Iter {}'.format(ite))

                if ite > 0 and self.__whether_convergence(tmp_pred_cur, self.last_pred):
                    break
                self.last_pred = tmp_pred_cur

            if index + self.batch_size > self.data_size:
                feat_batch = feat[index:]
                p_batch = self.current_p[index:]
                index = 0
            else:
                feat_batch = feat[index: index + self.batch_size]
                p_batch = self.current_p[index: index + self.batch_size]
            feat_batch = Variable(torch.from_numpy(feat_batch.astype(np.float32)))
            p_batch = Variable(torch.from_numpy(p_batch.astype(np.float32)))
            if self.use_cuda:
                feat_batch = feat_batch.cuda()
                p_batch = p_batch.cuda()

            self.cluster_layer.zero_grad()
            q_batch = self.cluster_layer(feat_batch)
            cluster_loss = F.binary_cross_entropy(q_batch, p_batch)
            if self.logger is not None:
                self.logger.record_loss(cluster_loss.data[0], ite)
            cluster_loss.backward()
            self.optimizer.step()
Exemplo n.º 5
0
 def update_cluster_acc(self):
     from sklearn.metrics import normalized_mutual_info_score
     from sklearn.metrics import adjusted_mutual_info_score
     self.current_cluster_acc = cluster_acc(np.array(self.corpus_loader.train_labels), self.current_pred_labels)
     self.current_cluster_nmi = normalized_mutual_info_score(np.array(self.corpus_loader.train_labels), self.current_pred_labels)
     self.current_cluster_ari = adjusted_mutual_info_score(np.array(self.corpus_loader.train_labels), self.current_pred_labels)
Exemplo n.º 6
0
def run_eval(config):
    """Runs the evaluation of a deep generative model.

    Args:
        config: A configuration object with config values accessible as properties.
    """

    # Set the random seed for shuffling and sampling
    tf.random.set_seed(config.random_seed)

    gpus = tf.config.experimental.list_physical_devices('GPU')
    # Extract integer GPU IDs
    gpu_ids = list(map(int, config.gpu_num.split(',')))
    # Set the accessible GPUs for training
    try:
        for i in gpu_ids:
            tf.config.experimental.set_memory_growth(gpus[i], True)
            tf.config.experimental.set_visible_devices(gpus[i], 'GPU')

        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

    if config.dataset == 'moving_mnist':
        logging.info("Loading the Moving MNIST dataset...")
        dataset, train_mu = create_moving_mnist(config,
                                                split=config.split,
                                                shuffle=False)
        # Convert training set mean to logit space for bias initialisation of generative model
        gen_bias_init = -tf.math.log(
            1. / tf.clip_by_value(train_mu, 0.0001, 0.9999) - 1)
        data_type = 'binary'
    elif config.dataset == 'sprites':
        logging.info("Loading the Sprites dataset...")
        _, dataset, _ = create_lpc_sprites(config, shuffle=False)
        train_mu = tf.zeros(
            [config.patch_size, config.patch_size, config.num_channels],
            dtype=np.float32)
        gen_bias_init = 0.0
        data_type = 'real'

    logging.info("Constructing the unsupervised generative model...")
    model = create_model(config, gen_bias_init, data_type)

    # Set up log directory for loading the pre-trained model
    logdir = '{}/{}/train/{}/h{}_r{}_f{}_z{}/run{}'.format(
        config.logdir, config.dataset, config.model, config.hidden_size,
        config.rnn_size, config.latent_size, config.dynamic_latent_size,
        config.random_seed)
    if not tf.io.gfile.exists(logdir):
        logging.error("No directory {}".format(logdir))
        sys.exit(1)

    # Checkpoint management
    ckpt = tf.train.Checkpoint(model=model,
                               epoch=tf.Variable(0,
                                                 trainable=False,
                                                 dtype=tf.int64))
    manager = tf.train.CheckpointManager(ckpt, directory=logdir, max_to_keep=5)
    ckpt.restore(manager.latest_checkpoint).expect_partial()

    if manager.latest_checkpoint:
        logging.info("Successfully restored from {}".format(
            manager.latest_checkpoint))
        step = int(ckpt.epoch)
        logging.info("At epoch: {}".format(step))
    else:
        logging.error("Failed to restore the model checkpoint.")
        sys.exit(1)

    # Summary directory for evaluation results
    summary_dir = '{}/{}/{}/{}/h{}_r{}_f{}_z{}/run{}'.format(
        config.logdir, config.dataset, config.split, config.model,
        config.hidden_size, config.rnn_size, config.latent_size,
        config.dynamic_latent_size, config.random_seed)
    qualitative_dir = summary_dir + '/qualitative_results'
    if not tf.io.gfile.exists(qualitative_dir):
        tf.io.gfile.makedirs(qualitative_dir)

    # Create summary writer
    summary_writer = tf.summary.create_file_writer(summary_dir)
    summary_writer.set_as_default()

    # Boolean flags to switch between models
    is_clustering = (config.model == 'discvae') or (config.model == 'gmvae')
    logging.info("Clustering? {}".format(is_clustering))
    is_predictive = (config.model == 'discvae') or (config.model == 'vrnn')
    logging.info("Predictive? {}".format(is_predictive))

    # Evaluation metrics
    elbo = tf.keras.metrics.Mean(name='elbo', dtype=tf.float32)
    bce_loss = torch.nn.BCELoss()
    mse_loss = torch.nn.MSELoss()
    bce_results = []
    mse_results = []

    latents = []
    predictions = []
    labels = []
    # Loop over dataset for a single epoch
    for imgs, tgts, lens, labs_all in dataset:
        # Compute bound estimates
        if config.model == 'discvae':
            elbo_per_batch, infer_c, latent_per_batch, _, _ = model.run_model(
                imgs, tgts, lens, num_samples=config.num_samples)
            predictions.extend(infer_c)
            # Sample from the inferred clusters of the GMM
            prior_f, _ = model.sample_static_prior(
                infer_c, num_samples=config.num_samples)
        elif config.model == 'vrnn':
            elbo_per_batch, latent_per_batch, _, _ = model.run_model(
                imgs, tgts, lens, num_samples=config.num_samples)
        elif config.model == 'gmvae':
            elbo_per_batch, infer_c, _, _ = model.run_model(
                tgts, num_samples=config.num_samples)
            predictions.extend(infer_c)
            latent_per_batch, _ = model.sample_prior(
                infer_c, num_samples=config.num_samples)
        else:
            elbo_per_batch, latent_per_batch, _ = model.run_model(
                tgts, num_samples=config.num_samples)

        # Mean integration of MC samples
        latent = tf.reduce_mean(latent_per_batch, axis=0)
        latents.extend(latent)

        # Extend labels for all models
        labels.extend(labs_all)

        # Update elbo metric
        elbo.update_state(elbo_per_batch)

        # Future prediction evaluation if relevant
        if is_predictive:
            input_prefixes = imgs[:config.prefix_length]
            target_prefixes = tgts[:config.prefix_length]
            prefix_lengths = tf.ones_like(lens) * config.prefix_length

            sample_inputs = imgs[config.prefix_length]
            # Run model on prefix input sequences and then conditionally sample forward in time
            if config.model == 'discvae':
                _, _, prefix_f, final_state, _ = model.run_model(
                    input_prefixes,
                    target_prefixes,
                    prefix_lengths,
                    num_samples=config.num_samples)
                # (sample_length, num_samples, batch_size, patch_size, patch_size, num_channels)
                forecasts = model.sample_model(
                    sample_inputs,
                    final_state,
                    inject_f=prefix_f,
                    sample_length=config.sample_length,
                    train_mu=train_mu)
            elif config.model == 'vrnn':
                _, _, final_state, _ = model.run_model(
                    input_prefixes,
                    target_prefixes,
                    prefix_lengths,
                    num_samples=config.num_samples)
                # (sample_length, num_samples, batch_size, patch_size, patch_size, num_channels)
                forecasts = model.sample_model(
                    sample_inputs,
                    final_state,
                    sample_length=config.sample_length,
                    train_mu=train_mu)

            # (sample_length, batch_size, patch_size, patch_size, num_channels)
            forecasts = tf.reduce_mean(forecasts, axis=1)
            ground_truth = tgts[config.prefix_length:config.prefix_length +
                                config.sample_length]

            forecasts_torch = torch.from_numpy(np.array(forecasts))
            ground_truth_torch = torch.from_numpy(np.array(ground_truth))

            mse_score = mse_loss(forecasts_torch, ground_truth_torch)
            eps = 1e-4
            forecasts_torch[forecasts_torch < eps] = eps
            forecasts_torch[forecasts_torch > 1 - eps] = 1 - eps
            bce_score = bce_loss(forecasts_torch, ground_truth_torch)
            bce_score = bce_score.item(
            ) * config.patch_size * config.patch_size * config.num_channels
            mse_score = mse_score.item(
            ) * config.patch_size * config.patch_size * config.num_channels

            bce_results.append(bce_score)
            mse_results.append(mse_score)

    latents_np = np.array(latents)
    labels_np = np.array(labels)
    primary_labels = labels_np[:, 0]

    logging.info("Plotting latent code for inferred latent variable...")
    latent_two = utils.reduce_dimensionality(latents_np)
    if config.dataset == 'moving_mnist':
        utils.tsne_visualise(qualitative_dir,
                             step,
                             latent_two,
                             primary_labels,
                             num_colours=10)
    else:
        utils.tsne_visualise(qualitative_dir,
                             step,
                             latent_two,
                             primary_labels,
                             num_colours=9)

    # Save summaries following evaluation over 'split' set
    tf.summary.scalar(config.split + '/elbo', elbo.result(), step=ckpt.epoch)
    # If a clustering model then report on the metric
    if is_clustering:
        predictions_np = np.array(predictions)
        test_acc = utils.cluster_acc(predictions_np, primary_labels)
        tf.summary.scalar(config.split + '/acc',
                          test_acc * 100,
                          step=ckpt.epoch)
        test_nmi = utils.compute_NMI(predictions_np, primary_labels)
        tf.summary.scalar(config.split + '/nmi', test_nmi, step=ckpt.epoch)

    # If a predictive model then report on relevant metrics
    if is_predictive:
        tf.summary.scalar(config.split + '/bce',
                          np.mean(bce_results),
                          step=ckpt.epoch)
        tf.summary.scalar(config.split + '/mse',
                          np.mean(mse_results),
                          step=ckpt.epoch)

    # Perform full qualitiative analysis only if the model is DiSCVAE
    if config.model == 'discvae':
        logging.info(
            "Plotting density estimates of component samples from model prior..."
        )
        component_f, learnt_prior = model.sample_static_prior(num_samples=250)
        flattened_component_f = tf.reshape(component_f,
                                           [-1, config.latent_size])
        component_f_two = utils.reduce_dimensionality(flattened_component_f)
        utils.plot_density(qualitative_dir, step, component_f_two)

        # Create plots of sampled states from fixed 'f' samples
        for imgs, tgts, lens, _ in dataset.take(1):
            # Take random batch example but maintain batch dimension
            rand_batch = np.random.randint(config.batch_size)
            inputs = tf.expand_dims(imgs[:, rand_batch], axis=1)
            targets = tf.expand_dims(tgts[:, rand_batch], axis=1)

            # Run model through this single batched prefix sequence
            input_prefixes = inputs[:config.prefix_length]
            target_prefixes = targets[:config.prefix_length]
            _, infer_c, prefix_f, final_state, _ = model.run_model(
                input_prefixes, target_prefixes, [config.prefix_length])

            # Sample forward from model to obtain conditionally generated predictions
            sample_inputs = inputs[config.prefix_length]
            ground_truth = targets[config.prefix_length:config.prefix_length +
                                   config.sample_length]

            # (sample_length, 1, batch_size, patch_size, patch_size, num_channels)
            forecast_samples = model.sample_model(
                sample_inputs,
                final_state,
                inject_f=prefix_f,
                sample_length=config.sample_length,
                train_mu=train_mu)

            # Extract mean of 'num_samples' from each cluster (1, K, latent_size)
            inject_f = tf.reduce_mean(component_f[:config.num_samples],
                                      axis=0,
                                      keepdims=True)
            # Reshape to have single batch size (1, 1, K, latent_size)
            inject_f = tf.expand_dims(inject_f, axis=1)

            # Sampled states from each cluster
            inject_samples = [None] * config.mixture_components
            for k in range(config.mixture_components):
                inject_samples[k] = model.sample_model(
                    sample_inputs,
                    final_state,
                    inject_f=inject_f[:, :, k],
                    sample_length=config.sample_length,
                    train_mu=train_mu).numpy()

            logging.info(
                "Plotting sampled sequence ground truth and forecasts...")
            utils.plot_video_sequence(qualitative_dir,
                                      step,
                                      target_prefixes.numpy(),
                                      name='prefixes')
            utils.plot_video_sequence(qualitative_dir,
                                      step,
                                      ground_truth.numpy(),
                                      name='ground_truth')
            utils.plot_video_sequence(qualitative_dir,
                                      step,
                                      forecast_samples.numpy(),
                                      name='forecasts')

            logging.info(
                "Plotting forecasted states of fixed samples from each cluster..."
            )
            utils.plot_k_samples(qualitative_dir,
                                 step,
                                 ground_truth.numpy(),
                                 inject_samples,
                                 infer_c[0].numpy(),
                                 num_k_display=config.mixture_components)

            # Feature swapping and fixing for reconstruction only
            recons = model.reconstruct(imgs,
                                       tgts,
                                       lens,
                                       num_samples=config.num_samples)
            swapped_f = model.reconstruct(imgs,
                                          tgts,
                                          lens,
                                          swap_f=True,
                                          num_samples=config.num_samples)
            swapped_z = model.reconstruct(imgs,
                                          tgts,
                                          lens,
                                          swap_z=True,
                                          num_samples=config.num_samples)

            logging.info("Plotting reconstructions and swapped features...")
            utils.plot_batch_sequence(qualitative_dir,
                                      step,
                                      tgts.numpy(),
                                      name='original')
            utils.plot_batch_sequence(qualitative_dir,
                                      step,
                                      recons.numpy(),
                                      name='reconstructions')
            utils.plot_batch_sequence(qualitative_dir,
                                      step,
                                      swapped_f.numpy(),
                                      name='swapped_f')
            utils.plot_batch_sequence(qualitative_dir,
                                      step,
                                      swapped_z.numpy(),
                                      name='swapped_z')

    # Force flush the summary writer after testing
    summary_writer.flush()
Exemplo n.º 7
0
def run_train(config):
    """Runs the training of a deep generative model.

    Args:
        config: A configuration object with config values accessible as properties.
    """

    # Set the random seed for shuffling and sampling
    tf.random.set_seed(config.random_seed)

    gpus = tf.config.experimental.list_physical_devices('GPU')
    # Extract integer GPU IDs
    gpu_ids = list(map(int, config.gpu_num.split(',')))
    # Set the accessible GPUs for training
    try:
        for i in gpu_ids:
            tf.config.experimental.set_memory_growth(gpus[i], True)
            tf.config.experimental.set_visible_devices(gpus[i], 'GPU')

        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

    if config.dataset == 'moving_mnist':
        logging.info("Loading the Moving MNIST dataset...")
        train_ds, train_mu = create_moving_mnist(config,
                                                 split='train',
                                                 shuffle=True)
        test_ds, _ = create_moving_mnist(config,
                                         split=config.split,
                                         shuffle=False)
        # Convert training set mean to logit space for bias initialisation of generative model
        gen_bias_init = -tf.math.log(
            1. / tf.clip_by_value(train_mu, 0.0001, 0.9999) - 1)
        data_type = 'binary'
    elif config.dataset == 'sprites':
        logging.info("Loading the Sprites dataset...")
        train_ds, test_ds, _ = create_lpc_sprites(config, shuffle=True)
        gen_bias_init = 0.0
        data_type = 'real'

    logging.info("Constructing the unsupervised generative model...")
    model = create_model(config, gen_bias_init, data_type)

    # Set up the optimiser
    opt = tf.keras.optimizers.Adam(config.learning_rate,
                                   clipnorm=config.clip_norm)

    # Set up log directory for saving checkpoints
    logdir = '{}/{}/train/{}/h{}_r{}_f{}_z{}/run{}'.format(
        config.logdir, config.dataset, config.model, config.hidden_size,
        config.rnn_size, config.latent_size, config.dynamic_latent_size,
        config.random_seed)
    if not tf.io.gfile.exists(logdir):
        logging.info("Creating log directory at {}".format(logdir))
        tf.io.gfile.makedirs(logdir)

    # Checkpoint management
    ckpt = tf.train.Checkpoint(model=model,
                               epoch=tf.Variable(0,
                                                 trainable=False,
                                                 dtype=tf.int64),
                               step=tf.Variable(0,
                                                trainable=False,
                                                dtype=tf.int64),
                               optimizer=opt)
    manager = tf.train.CheckpointManager(ckpt, directory=logdir, max_to_keep=5)
    ckpt.restore(manager.latest_checkpoint)

    if manager.latest_checkpoint:
        logging.info("Restored from {}".format(manager.latest_checkpoint))
    else:
        logging.info("Initialising from scratch...")

    # Create summary writer
    summary_writer = tf.summary.create_file_writer(logdir + '/summaries')
    summary_writer.set_as_default()

    # Boolean flags to switch between models
    is_clustering = (config.model == 'discvae') or (config.model == 'gmvae')
    logging.info("Clustering? {}".format(is_clustering))

    # Training aggregate metrics
    train_elbo = tf.keras.metrics.Mean(name='train_elbo', dtype=tf.float32)
    test_elbo = tf.keras.metrics.Mean(name=config.split + '_elbo',
                                      dtype=tf.float32)

    for i in range(config.num_epochs):
        # Reset the metrics at the start of the next epoch
        train_elbo.reset_states()
        test_elbo.reset_states()
        # Lists for predictions and labels
        train_predictions = []
        train_labels = []
        test_predictions = []
        test_labels = []

        # Loop over training set
        for imgs, tgts, lens, labs in train_ds:
            with tf.GradientTape() as tape:
                # Run the model to compute the ELBO objective and reconstructions
                if config.model == 'discvae':
                    elbo, infer_c, _, _, recons = model.run_model(
                        imgs,
                        tgts,
                        lens,
                        ckpt.step,
                        num_samples=config.num_samples)
                    train_predictions.extend(infer_c)
                    train_labels.extend(labs)
                elif config.model == 'vrnn':
                    elbo, _, _, recons = model.run_model(
                        imgs,
                        tgts,
                        lens,
                        ckpt.step,
                        num_samples=config.num_samples)
                elif config.model == 'gmvae':
                    elbo, infer_c, _, recons = model.run_model(
                        tgts, ckpt.step, num_samples=config.num_samples)
                    train_predictions.extend(infer_c)
                    train_labels.extend(labs)
                else:
                    elbo, _, recons = model.run_model(
                        tgts, ckpt.step, num_samples=config.num_samples)

                # Compute gradients of operations with respect to model variables
                grads = tape.gradient(-elbo, model.variables)
                # Maximise ELBO objective
                opt.apply_gradients(list(zip(grads, model.variables)))

                # Update metrics
                train_elbo.update_state(elbo)

                if (ckpt.step % config.summarise_every == 0):
                    # Transpose for summary visualisations
                    inputs_viz = tf.transpose(tgts, perm=[1, 0, 2, 3, 4])
                    recons_viz = tf.transpose(recons, perm=[1, 0, 2, 3, 4])

                    # Only take 4 example reconstructions
                    combined = tf.concat((inputs_viz[:4], recons_viz[:4]),
                                         axis=0)

                    utils.image_seq_summary(combined,
                                            'reconstructions',
                                            step=ckpt.step)

            # Increment global step
            ckpt.step.assign_add(1)

        # Loop over test set
        for imgs, tgts, lens, labs in test_ds:
            # Acquire test set metrics from computed loss tensors
            if config.model == 'discvae':
                elbo, infer_c, _, _, _ = model.run_model(
                    imgs, tgts, lens, num_samples=config.num_samples)
                test_predictions.extend(infer_c)
                test_labels.extend(labs)
            elif config.model == 'vrnn':
                elbo, _, _, _ = model.run_model(imgs,
                                                tgts,
                                                lens,
                                                num_samples=config.num_samples)
            elif config.model == 'gmvae':
                elbo, infer_c, _, _ = model.run_model(
                    tgts, num_samples=config.num_samples)
                test_predictions.extend(infer_c)
                test_labels.extend(labs)
            else:
                elbo, _, _ = model.run_model(tgts,
                                             num_samples=config.num_samples)

            test_elbo.update_state(elbo)

        # Logging phase
        if is_clustering:
            train_predictions_np = np.array(train_predictions)
            train_labels_np = np.array(train_labels)
            train_acc = utils.cluster_acc(train_predictions_np,
                                          train_labels_np[:, 0])

            test_predictions_np = np.array(test_predictions)
            test_labels_np = np.array(test_labels)
            test_acc = utils.cluster_acc(test_predictions_np,
                                         test_labels_np[:, 0])

            template = "Epoch {:d}, ELBO: {:.2f}, Test ELBO: {:.2f}, Acc: {:.2f}, Test Acc: {:.2f}"
            aggreg_results = [
                train_elbo.result(),
                test_elbo.result(), train_acc * 100, test_acc * 100
            ]
            print(
                template.format(int(ckpt.epoch), aggreg_results[0],
                                aggreg_results[1], aggreg_results[2],
                                aggreg_results[3]))
        else:
            template = "Epoch {:d}, ELBO: {:.2f}, Test ELBO: {:.2f}"
            aggreg_results = [train_elbo.result(), test_elbo.result()]
            print(
                template.format(int(ckpt.epoch), aggreg_results[0],
                                aggreg_results[1]))

        # Save aggregate summaries for logging stage
        with tf.name_scope('aggregates'):
            tf.summary.scalar('train_elbo', aggreg_results[0], step=ckpt.epoch)
            tf.summary.scalar(config.split + '_elbo',
                              aggreg_results[1],
                              step=ckpt.epoch)

            if is_clustering:
                tf.summary.scalar('train_acc',
                                  aggreg_results[2],
                                  step=ckpt.epoch)
                tf.summary.scalar(config.split + '_acc',
                                  aggreg_results[3],
                                  step=ckpt.epoch)

        # Checkpoint phase
        is_final_epoch = ((i + 1) == config.num_epochs)
        is_save_epoch = (i % config.save_every == 0)
        if is_save_epoch or is_final_epoch:
            save_path = manager.save()
            print("Saving checkpoint for step {}: {}".format(
                int(ckpt.step), save_path))

        # Increment epoch
        ckpt.epoch.assign_add(1)
        # Force flush the summary writer during training
        summary_writer.flush()
Exemplo n.º 8
0
def train_idec():

    model = IDEC(n_enc_1=500,
                 n_enc_2=500,
                 n_enc_3=1000,
                 n_dec_1=1000,
                 n_dec_2=500,
                 n_dec_3=500,
                 n_input=args.n_input,
                 n_z=args.n_z,
                 n_clusters=args.n_clusters,
                 alpha=1.0,
                 pretrain_path=args.pretrain_path).to(device)

    #  model.pretrain('data/ae_mnist.pkl')
    model.pretrain()

    train_loader = DataLoader(dataset,
                              batch_size=args.batch_size,
                              shuffle=False)
    optimizer = Adam(model.parameters(), lr=args.lr)

    # cluster parameter initiate
    data = dataset.x
    y = dataset.y
    data = torch.Tensor(data).to(device)
    x_bar, hidden = model.ae(data)

    kmeans = KMeans(n_clusters=args.n_clusters, n_init=20)
    y_pred = kmeans.fit_predict(hidden.data.cpu().numpy())
    nmi_k = nmi_score(y_pred, y)
    print("nmi score={:.4f}".format(nmi_k))

    hidden = None
    x_bar = None

    y_pred_last = y_pred
    model.cluster_layer.data = torch.tensor(kmeans.cluster_centers_).to(device)

    model.train()
    for epoch in range(100):

        if epoch % args.update_interval == 0:

            _, tmp_q = model(data)

            # update target distribution p
            tmp_q = tmp_q.data
            p = target_distribution(tmp_q)

            # evaluate clustering performance
            y_pred = tmp_q.cpu().numpy().argmax(1)
            delta_label = np.sum(y_pred != y_pred_last).astype(
                np.float32) / y_pred.shape[0]
            y_pred_last = y_pred

            acc = cluster_acc(y, y_pred)
            nmi = nmi_score(y, y_pred)
            ari = ari_score(y, y_pred)
            print('Iter {}'.format(epoch), ':Acc {:.4f}'.format(acc),
                  ', nmi {:.4f}'.format(nmi), ', ari {:.4f}'.format(ari))

            if epoch > 0 and delta_label < args.tol:
                print('delta_label {:.4f}'.format(delta_label), '< tol',
                      args.tol)
                print('Reached tolerance threshold. Stopping training.')
                break
        for batch_idx, (x, _, idx) in enumerate(train_loader):

            x = x.to(device)
            idx = idx.to(device)

            x_bar, q = model(x)

            reconstr_loss = F.mse_loss(x_bar, x)
            kl_loss = F.kl_div(q.log(), p[idx])
            loss = args.gamma * kl_loss + reconstr_loss

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
Exemplo n.º 9
0
                logfile, fieldnames=['iter', 'acc', 'nmi', 'ari', 'L'])
            logwriter.writeheader()

            loss = 0
            idx = 0
            t0 = time()
            for ite in range(int(args.maxiter)):
                if ite % args.update_interval == 0:
                    q = model.predict_generator(AE_generator, verbose=1)
                    p = target_distribution(
                        q)  # update the auxiliary target distribution p
                    print(p.shape)
                    # evaluate the clustering performance
                    y_pred = q.argmax(1)
                    if y_true is not None:
                        acc = np.round(cluster_acc(y_true, y_pred), 5)
                        nmi = np.round(
                            metrics.normalized_mutual_info_score(
                                y_true, y_pred), 5)
                        ari = np.round(
                            metrics.adjusted_rand_score(y_true, y_pred), 5)
                        loss = np.round(loss, 5)
                        logwriter.writerow(
                            dict(iter=ite, acc=acc, nmi=nmi, ari=ari, L=loss))
                        print(
                            'Iter-%d: ACC= %.4f, NMI= %.4f, ARI= %.4f;  L= %.5f'
                            % (ite, acc, nmi, ari, loss))

                    # check stop criterion
                    # When delta_label ==0 is because y_pred = y_pred_last. (It was no improvement)
                    delta_label = np.sum(y_pred != y_pred_last).astype(
Exemplo n.º 10
0
    def fit(self,
            trainloader,
            validloader,
            path,
            lr=0.001,
            num_epochs=10,
            anneal=False,
            tol=0.0005):
        labels_changed = 1
        use_cuda = torch.cuda.is_available()
        if use_cuda:
            self.cuda()
        print("=====Fitting the model......Patience=======")
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      self.parameters()),
                               lr=lr)
        train_error = []
        for epoch in range(num_epochs):
            # train 1 epoch
            self.train()
            if anneal:
                epoch_lr = adjust_learning_rate(lr, optimizer, epoch)
            train_loss = 0
            loop = tqdm(trainloader)
            for batch_idx, (inputs, _, _) in enumerate(loop):
                if use_cuda:
                    inputs = inputs.cuda()
                optimizer.zero_grad()
                inputs = Variable(inputs)

                z, outputs, mu, logvar = self.forward(inputs)
                loss = self.loss_function(outputs, inputs, z, mu, logvar)
                train_loss += loss.item()
                loss.backward()
                optimizer.step()

            self.eval()
            Y = []
            Y_pred = []
            probabilities = []
            for batch_idx, (inputs, labels, _) in enumerate(validloader):
                if use_cuda:
                    inputs = inputs.cuda()
                inputs = Variable(inputs)
                z, outputs, mu, logvar = self.forward(inputs)

                q_c_x = self.compute_gamma(z).data.cpu().numpy()
                probabilities.append(q_c_x)
                Y.append(labels.numpy())
                Y_pred.append(np.argmax(q_c_x, axis=1))

            Y = np.concatenate(Y)
            Y_pred = np.concatenate(Y_pred)
            if epoch != 0:
                labels_changed = np.sum(Y_pred != self.Y_pred_last).astype(
                    np.float32) / Y_pred.shape[0]
            self.Y_pred_last = Y_pred
            if labels_changed < tol:
                self.convergence_iter += 1
            else:
                self.convergence_iter = 0
            acc = cluster_acc(Y_pred, Y)
            # valid_loss = total_loss / total_num
            print("#Epoch %3d: lr: %.5f, Train Loss: %.5f, acc: %.5f" %
                  (epoch, epoch_lr, train_loss / len(trainloader.dataset),
                   acc[0]))
            train_error.append(train_loss / len(trainloader.dataset))
            self.save_model(path)
            if self.convergence_iter >= 5:
                print(
                    'percentage of labels changed {:.4f}'.format(
                        labels_changed), '< tol', tol)
                print('Reached Convergence threshold. Stopping training.')
                break
Exemplo n.º 11
0
            np.where(specs.test_indices[i] == indices)[0][0]
            for i in range(len(specs.test_indices))
        ]

        # Select only the labels which are to be used in the evaluation (disjoint for validation and test)
        validation_target = np.asarray([target[i] for i in validation_indices])
        test_target = np.asarray([target[i] for i in test_indices])

        # Split the cluster assignments for validation and test sets
        validation_cluster_assign = np.asarray(
            [kmeans_model.labels_[i] for i in validation_indices])
        test_cluster_assign = np.asarray(
            [kmeans_model.labels_[i] for i in test_indices])

        # Evaluate the clustering validation performance using the ground-truth labels
        validation_acc = cluster_acc(validation_target,
                                     validation_cluster_assign)
        print("Validation ACC", validation_acc)
        validation_ari = adjusted_rand_score(validation_target,
                                             validation_cluster_assign)
        print("Validation ARI", validation_ari)
        validation_nmi = normalized_mutual_info_score(
            validation_target, validation_cluster_assign)
        print("Validation NMI", validation_nmi)

        # Evaluate the clustering test performance using the ground-truth labels
        test_acc = cluster_acc(test_target, test_cluster_assign)
        print("Test ACC", test_acc)
        test_ari = adjusted_rand_score(test_target, test_cluster_assign)
        print("Test ARI", test_ari)
        test_nmi = normalized_mutual_info_score(test_target,
                                                test_cluster_assign)
X_train = X_train.reshape((X_train.shape[0], -1))

ae = AutoEncoder(AE_NET, EMBEDDING_SIZE, SEED)
dkmeans = DeepKMeans(ae, K, seed=SEED)
kmeans = KMeans(n_clusters=K, init="k-means++", random_state=SEED)

logdir = "logs/"
file_writer = tf.summary.create_file_writer(logdir, flush_millis=10000)
file_writer.set_as_default()

dkmeans.fit(X_train,
            BATCH_SIZE,
            PRETRAIN_EPOCHS,
            FINETUNE_EPOCH,
            UPDATE_EPOCH,
            LEARNING_RATE,
            LEARNING_RATE,
            seed=SEED,
            verbose=True)
kmeans.fit(X_train)

cls_dkm, _ = dkmeans(X_train)
cls_km = kmeans.predict(X_train)

print("K-means")
print("   ACC: ", cluster_acc(y_train, cls_km))
print("   NMI: ", normalized_mutual_info_score(y_train, cls_km))
print("Deep K-means")
print("   ACC: ", cluster_acc(y_train, cls_dkm.numpy()))
print("   NMI: ", normalized_mutual_info_score(y_train, cls_dkm.numpy()))
Exemplo n.º 13
0
 def _evaluate(pred_batches, labels):
     preds = np.hstack(pred_batches)
     truths = labels[:preds.size]
     acc, _ = cluster_acc(preds, truths)
     nmi = adjusted_mutual_info_score(truths, labels_pred=preds)
     return acc, nmi
Exemplo n.º 14
0
    def fit(self, feat, labels=None):
        feat = feat.astype(np.float32)
        batch_size = self.batch_size
        data_size = feat.shape[0]
        count = {i: 0 for i in range(self.n_clusters)}

        hidden_feat = self.get_hidden_features(feat,
                                               self.net,
                                               self.hidden_dim,
                                               batch_size=self.batch_size,
                                               use_cuda=self.use_cuda)
        idx, centers = self.init_cluster(hidden_feat,
                                         n_clusters=self.n_clusters)
        last_pred = idx[:]
        if labels is not None:
            acc = cluster_acc(labels, idx)
            print('KMeans pretraining acc is {}'.format(acc))

        # optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        # optimizer = optim.ASGD(self.net.parameters(), lr=self.lr)
        optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=0.9)

        for epoch in range(self.max_epochs):
            if False:
                if epoch < 10:
                    count = {i: 50 for i in range(self.n_clusters)}
            for index in range(0, data_size, batch_size):
                feat_batch = Variable(
                    torch.from_numpy(feat[index:index + batch_size]))
                idx_batch = idx[index:index + batch_size]
                centers_batch = Variable(torch.from_numpy(centers[idx_batch]))
                if self.use_cuda:
                    feat_batch = feat_batch.cuda()
                    centers_batch = centers_batch.cuda()
                optimizer.zero_grad()
                hidden_batch, output_batch = self.net(feat_batch)
                recons_loss = F.mse_loss(output_batch, feat_batch)
                cluster_loss = F.mse_loss(hidden_batch, centers_batch)
                loss = self.recons_lam * recons_loss + self.cluster_lam * cluster_loss
                loss.backward()
                optimizer.step()
                hidden_batch2, _ = self.net(feat_batch)
                hidden_batch2 = hidden_batch2.cpu().data.numpy()
                tmp_idx_batch, centers, count = self.batch_km(
                    hidden_batch2, centers, count)
                idx[index:index + batch_size] = tmp_idx_batch

            hidden_feat = self.get_hidden_features(feat,
                                                   self.net,
                                                   self.hidden_dim,
                                                   batch_size=self.batch_size,
                                                   use_cuda=self.use_cuda)
            idx, centers = self.init_cluster(hidden_feat,
                                             n_clusters=self.n_clusters,
                                             init_centers=centers)
            acc = None
            if labels is not None:
                acc = cluster_acc(labels, idx)
            if self.verbose:
                print('Epoch {} end, current acc is {}'.format(epoch + 1, acc))
            if self.whether_convergence(last_pred, idx, self.tol):
                print('End Iter')
                break
            else:
                last_pred = idx[:]
        self.centenrs = centers
Exemplo n.º 15
0
    def fit(self, feat, seeds_dict, labels=None):
        assert len(seeds_dict) <= self.n_clusters

        feat = feat.astype(np.float32)
        batch_size = self.batch_size
        data_size = feat.shape[0]
        count = {i: 0 for i in range(self.n_clusters)}

        seed_masks = self.get_mask(seeds_dict, data_size)
        seed_labels = self.get_seed_labels(seeds_dict, data_size)
        hidden_feat = self.get_hidden_features(feat, self.net, self.hidden_dim, batch_size=self.batch_size, use_cuda=self.use_cuda)
        if True:
            seed_centers = self.get_seed_centers(n_clusters, seeds_dict, hidden_feat)
        else:
            seed_centers = None
        # idx, centers = self.init_cluster(hidden_feat, n_clusters=self.n_clusters)
        idx, centers = self.init_cluster(hidden_feat, n_clusters=self.n_clusters, init_centers=seed_centers)
        last_pred = idx[:]
        if labels is not None:
            acc = cluster_acc(labels, idx)
            print('KMeans pretraining acc is {}'.format(acc))
        for i in range(data_size):
            if seed_masks[i] == 1:
                idx[i] = seed_labels[i]

        if False:
            # align
            tmp_seed_labels = seed_labels[seed_masks.astype(np.bool)]
            tmp_idx = np.array(idx)[seed_masks.astype(np.bool)]
            tmp_mapping = align_labels(tmp_seed_labels, tmp_idx)
            tmp_idx = [tmp_mapping[i] for i in idx]
            tmp_range = [tmp_mapping[i] for i in range(self.n_clusters)]
            tmp_centers = centers[np.array(tmp_range)]
            centers = tmp_centers
            idx = tmp_idx
            if labels is not None:
                idx = np.array(idx)
                print(idx.size)
                print(labels.size)
                acc = cluster_acc(labels, idx)
                print('KMeans pretraining acc is {}'.format(acc))
            ###########################3


        # optimizer = optim.Adam(self.net.parameters(), lr=self.lr)
        # optimizer = optim.ASGD(self.net.parameters(), lr=self.lr)
        optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=0.9)

        for epoch in range(self.max_epochs):
            for index in range(0, data_size, batch_size):
                feat_batch = Variable(torch.from_numpy(feat[index: index+batch_size]))
                idx_batch = idx[index: index+batch_size]

                mask_batch = Variable(torch.from_numpy(seed_masks[index: index+batch_size]))
                seeds_labels_batch = seed_labels[index: index+batch_size]

                centers_batch = Variable(torch.from_numpy(centers[idx_batch]))

                seeds_centers_batch = Variable(torch.from_numpy(centers[seeds_labels_batch]))

                if self.use_cuda:
                    feat_batch = feat_batch.cuda()
                    centers_batch = centers_batch.cuda()
                    mask_batch = mask_batch.cuda()
                    seeds_centers_batch = seeds_centers_batch.cuda()

                optimizer.zero_grad()
                hidden_batch, output_batch = self.net(feat_batch)
                recons_loss = F.mse_loss(output_batch, feat_batch)
                cluster_loss = F.mse_loss(hidden_batch, centers_batch)

                seed_loss = torch.mean(mask_batch * torch.norm(hidden_batch - seeds_centers_batch, p=2, dim=1))

                # loss = self.recons_lam * recons_loss + self.cluster_lam * cluster_loss + seed_loss
                loss = self.recons_lam * recons_loss + self.cluster_lam * cluster_loss
                loss.backward()
                optimizer.step()
                hidden_batch2, _ = self.net(feat_batch)
                hidden_batch2 = hidden_batch2.cpu().data.numpy()
                # tmp_idx_batch, centers, count = self.batch_km(hidden_batch2, centers, count)
                tmp_idx_batch, centers, count = self.batch_km_seed(hidden_batch2, centers, count, mask_batch.cpu().data.numpy(), seeds_labels_batch)
                idx[index: index+batch_size] = tmp_idx_batch

            hidden_feat = self.get_hidden_features(feat, self.net, self.hidden_dim, batch_size=self.batch_size, use_cuda=self.use_cuda)
            idx, centers = self.init_cluster(hidden_feat, n_clusters=self.n_clusters, init_centers=centers)
            acc = None
            if labels is not None:
                acc = cluster_acc(labels, idx)
            if self.verbose:
                print('Epoch {} end, current acc is {}'.format(epoch + 1, acc))
            if self.whether_convergence(last_pred, idx, self.tol):
                print('End Iter')
                break
            else:
                last_pred = idx[:]
        self.centenrs = centers
Exemplo n.º 16
0
 def update_cluster_acc(self):
     self.current_cluster_acc = cluster_acc(
         np.array(self.corpus_loader.train_labels),
         self.current_pred_labels)
Exemplo n.º 17
0
    def cluster(self,
                args,
                x_data,
                y_data=None,
                test="train",
                tol=0.01,
                iter_max=1e6,
                **kwargs):

        save_path = os.path.join(args.save_weight_path,
                                 "dec_weights_{}.h5".format(args.dataset))

        if os.path.isfile(save_path):
            self.dec_model.load_weights(save_path)
            print('Restored Model weight')

        if test == "test":
            y_pred = self.dec_model.predict(x_data, verbose=0).argmax(1)
            acc = utils.cluster_acc(y_data, y_pred)
            print('Accuracy ' + str(np.round(acc, 5)))
            return

        update_interval = x_data.shape[0] / self.batch_size
        print('Update interval', update_interval)

        train = True
        iteration, index = 0, 0
        current_acc = 0
        self.accuracy = 0

        while train:
            sys.stdout.write('\r')
            # cut off iteration
            if iter_max < iteration:
                print('Reached maximum iteration limit. Stopping training.')
                return self.y_pred

            # update (or initialize) probability distributions and propagate weight changes
            # from DEC model to encoder.
            if iteration % update_interval == 0:
                self.q = self.dec_model.predict(x_data, verbose=0)
                self.p = self.p_mat(self.q)

                y_pred = self.q.argmax(1)
                delta_label = (np.sum(
                    (y_pred == self.y_pred)).astype(np.float32) /
                               y_pred.shape[0])
                if y_data is not None:
                    current_acc = utils.cluster_acc(y_data, y_pred)
                    print('Iteration ' + str(iteration) + ', Accuracy ' +
                          str(np.round(current_acc, 5)))

                else:
                    print(
                        str(np.round(delta_label * 100, 5)) +
                        '% change in label assignment')

                if delta_label < tol:
                    print('Reached tolerance threshold.')
                    train = False
                    continue
                else:
                    self.y_pred = y_pred

                # weight changes if current
                if self.accuracy < current_acc:
                    for i in range(len(self.encoder.layers)):
                        self.encoder.layers[i].set_weights(
                            self.dec_model.layers[0].layers[i].get_weights())
                    self.cluster_centroid = self.dec_model.layers[
                        -1].get_weights()[0]

                    # save checkpoint
                    self.dec_model.save(save_path)
                    self.accuracy = current_acc
                    print("update weight and save checkpoint")

            # train on batch
            sys.stdout.write('Iteration %d, ' % iteration)
            if (index + 1) * self.batch_size > x_data.shape[0]:
                loss = self.dec_model.train_on_batch(
                    x_data[index * self.batch_size::],
                    self.p[index * self.batch_size::])
                index = 0
                sys.stdout.write('Loss %f' % loss)
            else:
                loss = self.dec_model.train_on_batch(
                    x_data[index * self.batch_size:(index + 1) *
                           self.batch_size],
                    self.p[index * self.batch_size:(index + 1) *
                           self.batch_size])
                sys.stdout.write('Loss %f' % loss)
                index += 1

            iteration += 1
            sys.stdout.flush()

        return
Exemplo n.º 18
0
     text_idec_model = Text_IDEC(root_dir=root_dir + '/tfidf_i',
                                 update_interval=10,
                                 n_clusters=n_clusters,
                                 use_tensorboard=True,
                                 use_vat=False,
                                 id=4,
                                 semi_supervised=False,
                                 split_sents=True,
                                 use_ae=use_ae,
                                 fd_hidden_dim=cfg.HIDDEN_DIMS[-1])
     text_idec_model.clustering()
     print('Total acc is {}'.format(
         text_idec_model.current_cluster_acc))
     pred = np.array(text_idec_model.current_pred_labels)
     labels = np.array(text_idec_model.corpus_loader.train_labels)
     acc = cluster_acc(labels, pred)
     nmi = normalized_mutual_info_score(labels, pred)
     ari = adjusted_mutual_info_score(labels, pred)
     all_pred.append(pred.tolist())
     all_acc.append(acc)
     all_nmi.append(nmi)
     all_ari.append(ari)
     if acc > best_acc:
         best_pred = pred
         best_acc = acc
 print('{} best acc is {}'.format(feat_name, best_acc))
 pred_std = np.std(all_acc)
 pred_mean = np.mean(all_acc)
 dump_mongo(corpora=corpora_name,
            feat_name=feat_name,
            n_topics=n_clusters,