Пример #1
0
def main():
    output_path = Path('../output/try2_exactly_7_times')
    output_path.mkdir(exist_ok=True)
    save_path = output_path / 'vader.ckpt'

    # w_train, x_train, names = read_premade(DAYS_ORDERED)
    w_train, x_train, names = read_data()
    x_train = (x_train - np.mean(x_train)) / np.std(x_train)

    vader = VADER(x_train=x_train,
                  w_train=w_train,
                  save_path=save_path,
                  n_hidden=[128, 32],
                  k=5,
                  learning_rate=1e-3,
                  output_activation=None,
                  recurrent=True,
                  batch_size=8,
                  alpha=0.1)
    # pre-train without latent loss
    vader.pre_fit(n_epoch=20, verbose=True)
    # train with latent loss
    vader.fit(n_epoch=100, verbose=True)
    # get the clusters
    c = vader.cluster(x_train, w_train)
    # get the re-constructions
    p = vader.predict(x_train)

    print(vader.get_clusters_on_x())
Пример #2
0
def test1():
    save_path = os.path.join('test_vader', 'vader.ckpt')
    x_train, y_train, w_train = prepare_data()
    # json.dump(x_train, open("x_train.json", "wb"))
    # json.dump(y_train, open("y_train.json", "wb"))
    # json.dump(w_train, open("w_train.json", "wb"))
    pickle.dump(x_train, open("x_train.pickle", "wb"))
    pickle.dump(y_train, open("y_train.pickle", "wb"))
    pickle.dump(w_train, open("w_train.pickle", "wb"))
    # Note: y_train is used purely for monitoring performance when a ground truth clustering is available.
    # It can be omitted if no ground truth is available.
    vader = VADER(x_train=x_train,
                  w_train=w_train,
                  y_train=y_train,
                  save_path=save_path,
                  n_hidden=[12, 2],
                  k=4,
                  learning_rate=1e-3,
                  output_activation=None,
                  recurrent=True,
                  batch_size=16)
    # pre-train without latent loss
    vader.pre_fit(n_epoch=50, verbose=True)
    # train with latent loss
    vader.fit(n_epoch=50, verbose=True)
    # get the clusters
    c = vader.cluster(x_train)
    # get the re-constructions
    p = vader.predict(x_train)
Пример #3
0
    def _fit_vader(self, X_train: ndarray,
                   W_train: Optional[ndarray]) -> VADER:
        k = self.params_dict["k"]
        n_hidden = self.params_dict["n_hidden"]
        learning_rate = self.params_dict["learning_rate"]
        batch_size = self.params_dict["batch_size"]
        alpha = self.params_dict["alpha"]

        # noinspection PyTypeChecker
        vader = VADER(X_train=X_train,
                      W_train=W_train,
                      save_path=None,
                      n_hidden=n_hidden,
                      k=k,
                      seed=self.seed,
                      learning_rate=learning_rate,
                      recurrent=True,
                      batch_size=batch_size,
                      alpha=alpha)

        vader.pre_fit(n_epoch=10, verbose=False)
        vader.fit(n_epoch=self.n_epoch,
                  verbose=False,
                  early_stopping_ratio=self.early_stopping_ratio,
                  early_stopping_batch_size=self.early_stopping_batch_size)
        return vader
Пример #4
0
    def test_vader_save_load(self):
        save_path = "test_vader_save_load"
        if os.path.exists(save_path):
            shutil.rmtree(save_path)

        X_train, W_train, y_train = generate_x_w_y(7, 400)
        # noinspection PyTypeChecker
        vader = VADER(X_train=X_train,
                      W_train=W_train,
                      y_train=y_train,
                      save_path=save_path,
                      n_hidden=[12, 2],
                      k=4,
                      learning_rate=1e-3,
                      output_activation=None,
                      recurrent=True,
                      batch_size=16)
        vader.pre_fit(n_epoch=10, verbose=True)
        vader.fit(n_epoch=10, verbose=True)
        clustering_before_loading = vader.cluster(X_train)

        loaded_vader = VADER.load_model(save_path, X_train, W_train, y_train)
        clustering_after_loading = loaded_vader.cluster(X_train)

        if os.path.exists(save_path):
            shutil.rmtree(save_path)

        assert list(clustering_before_loading) == list(
            clustering_after_loading)
Пример #5
0
    def test_vader_recur(self):
        X_train, W_train, y_train = generate_x_w_y(7, 400)
        # Note: y_train is used purely for monitoring performance when a ground truth clustering is available.
        # It can be omitted if no ground truth is available.
        # noinspection PyTypeChecker
        vader = VADER(X_train=X_train,
                      W_train=W_train,
                      y_train=y_train,
                      save_path=None,
                      n_hidden=[12, 2],
                      k=4,
                      learning_rate=1e-3,
                      output_activation=None,
                      recurrent=True,
                      batch_size=16)

        # pre-train without latent loss
        vader.pre_fit(n_epoch=10, verbose=True)
        # train with latent loss
        vader.fit(n_epoch=10, verbose=True)
        # get the clusters
        clustering = vader.cluster(X_train)
        assert any(clustering)
        assert len(clustering) == len(X_train)
        # get the re-constructions
        prediction = vader.predict(X_train)
        assert prediction.shape == X_train.shape
        # compute the loss given the network
        loss = vader.get_loss(X_train)
        assert loss
        assert "reconstruction_loss" in loss
        assert "latent_loss" in loss
        assert loss["reconstruction_loss"] >= 0
        assert loss["latent_loss"] >= 0
Пример #6
0
    def test_vader_save_load_transfer_learning(self):
        save_folder = "test_vader_save_load_transfer_learning"
        save_path = f"{save_folder}//weights"

        if os.path.exists(save_folder):
            shutil.rmtree(save_folder)

        X_train, W_train, y_train = generate_x_w_y(7, 400)
        # noinspection PyTypeChecker
        vader = VADER(X_train=X_train,
                      W_train=W_train,
                      y_train=y_train,
                      save_path=save_path,
                      n_hidden=[12, 2],
                      k=4,
                      learning_rate=1e-3,
                      output_activation=None,
                      recurrent=True,
                      batch_size=16)
        vader.pre_fit(n_epoch=10, verbose=True)
        vader.fit(n_epoch=10, verbose=True)
        clustering_before_loading = vader.cluster(X_train)

        X_train_ft, W_train_ft, y_train_ft = generate_x_w_y(7, 400)
        vader = VADER(X_train=X_train_ft,
                      W_train=W_train_ft,
                      y_train=y_train_ft,
                      save_path=None,
                      n_hidden=[12, 2],
                      k=4,
                      learning_rate=1e-3,
                      output_activation=None,
                      recurrent=True,
                      batch_size=16)
        vader.load_weights(save_path)
        vader.pre_fit(n_epoch=10, verbose=True)
        vader.fit(n_epoch=10, verbose=True)
        # get the clusters
        clustering = vader.cluster(X_train_ft)

        if os.path.exists(save_folder):
            shutil.rmtree(save_folder)

        assert any(clustering)
        assert len(clustering) == len(X_train_ft)
        # get the re-constructions
        prediction = vader.predict(X_train_ft)
        assert prediction.shape == X_train_ft.shape
        # compute the loss given the network
        loss = vader.get_loss(X_train_ft)
        assert loss
        assert "reconstruction_loss" in loss
        assert "latent_loss" in loss
        assert loss["reconstruction_loss"] >= 0
        assert loss["latent_loss"] >= 0
Пример #7
0
 def test_vader_nonrecur(self):
     NUM_OF_TIME_POINTS = 7
     X_train, y_train = generate_x_y_for_nonrecur(NUM_OF_TIME_POINTS, 400)
     # Run VaDER non-recurrently (ordinary VAE with GM prior)
     # noinspection PyTypeChecker
     vader = VADER(X_train=X_train,
                   y_train=y_train,
                   n_hidden=[12, 2],
                   k=2,
                   learning_rate=1e-3,
                   output_activation=None,
                   recurrent=False,
                   batch_size=16)
     # pre-train without latent loss
     vader.pre_fit(n_epoch=10, verbose=True)
     # train with latent loss
     vader.fit(n_epoch=10, verbose=True)
     # get the clusters
     clustering = vader.cluster(X_train)
     assert any(clustering)
     assert len(clustering) == len(X_train)
     # get the re-constructions
     prediction = vader.predict(X_train)
     assert prediction.shape == X_train.shape
     # compute the loss given the network
     loss = vader.get_loss(X_train)
     assert loss
     assert "reconstruction_loss" in loss
     assert "latent_loss" in loss
     assert loss["reconstruction_loss"] >= 0
     assert loss["latent_loss"] >= 0
     # generate some samples
     NUM_OF_GENERATED_SAMPLES = 10
     generated_samples = vader.generate(NUM_OF_GENERATED_SAMPLES)
     assert generated_samples
     assert "clusters" in generated_samples
     assert "samples" in generated_samples
     assert len(generated_samples["clusters"]) == NUM_OF_GENERATED_SAMPLES
     assert generated_samples["samples"].shape == (NUM_OF_GENERATED_SAMPLES,
                                                   NUM_OF_TIME_POINTS)
Пример #8
0
    def test_vader_transfer_learning(self):
        X_train, W_train, y_train = generate_x_w_y(7, 400)
        # noinspection PyTypeChecker
        vader = VADER(X_train=X_train,
                      W_train=W_train,
                      y_train=y_train,
                      save_path=None,
                      n_hidden=[12, 2],
                      k=4,
                      learning_rate=1e-3,
                      output_activation=None,
                      recurrent=True,
                      batch_size=16)
        # pre-train without latent loss
        vader.pre_fit(n_epoch=10, verbose=True)
        # train with latent loss
        vader.fit(n_epoch=10, verbose=True)

        X_train_ft, W_train_ft, y_train_ft = generate_x_w_y(7, 400)
        vader.set_inputs(X_train_ft, W_train_ft, y_train_ft)
        # pre-train without latent loss
        vader.pre_fit(n_epoch=10, verbose=True)
        # train with latent loss
        vader.fit(n_epoch=10, verbose=True)

        # get the clusters
        clustering = vader.cluster(X_train_ft)
        assert any(clustering)
        assert len(clustering) == len(X_train_ft)
        # get the re-constructions
        prediction = vader.predict(X_train_ft)
        assert prediction.shape == X_train_ft.shape
        # compute the loss given the network
        loss = vader.get_loss(X_train_ft)
        assert loss
        assert "reconstruction_loss" in loss
        assert "latent_loss" in loss
        assert loss["reconstruction_loss"] >= 0
        assert loss["latent_loss"] >= 0
Пример #9
0
def test2():
    x_train, y_train = get_dete_for_seconed_test()
    vader = VADER(x_train=x_train,
                  y_train=y_train,
                  n_hidden=[12, 2],
                  k=2,
                  learning_rate=1e-3,
                  output_activation=None,
                  recurrent=False,
                  batch_size=16)
    # pre-train without latent loss
    vader.pre_fit(n_epoch=50, verbose=True)
    # train with latent loss
    vader.fit(n_epoch=50, verbose=True)
    # get the clusters
    c = vader.cluster(x_train)
    # get the re-constructions
    p = vader.predict(x_train)
    # compute the loss given the network
    l = vader.get_loss(x_train)
    # generate some samples
    g = vader.generate(10)
    # compute the loss given the network
    l = vader.get_loss(x_train)
Пример #10
0
     seed = f"{args.seed}{i}{j}" if args.seed else None
     # noinspection PyTypeChecker
     vader = VADER(X_train=input_data,
                   W_train=input_weights,
                   k=args.k,
                   n_hidden=n_hidden,
                   learning_rate=args.learning_rate,
                   batch_size=args.batch_size,
                   alpha=args.alpha,
                   seed=args.seed,
                   save_path=args.save_path,
                   output_activation=None,
                   recurrent=True)
     vader.pre_fit(n_epoch=10, verbose=False)
     vader.fit(n_epoch=args.n_epoch,
               verbose=False,
               early_stopping_ratio=args.early_stopping_ratio,
               early_stopping_batch_size=args.early_stopping_batch_size)
     fig = plot_loss_history(vader, model_name=f"Model #{j}")
     loss_history_pdf.savefig(fig)
     # noinspection PyTypeChecker
     clustering = vader.cluster(input_data, input_weights)
     effective_k = len(Counter(clustering))
     y_pred_repeats.append(clustering)
     effective_k_repeats.append(effective_k)
     train_reconstruction_loss_repeats.append(
         vader.reconstruction_loss[-1])
     train_latent_loss_repeats.append(vader.latent_loss[-1])
 effective_k = np.mean(effective_k_repeats)
 num_of_clusters = round(float(effective_k))
 clustering = ClusteringUtils.consensus_clustering(
     y_pred_repeats, num_of_clusters)
Пример #11
0
# It can be omitted if no ground truth is available.
vader = VADER(X_train=X_train,
              W_train=W_train,
              y_train=y_train,
              save_path=save_path,
              n_hidden=[12, 2],
              k=4,
              learning_rate=1e-3,
              output_activation=None,
              recurrent=True,
              batch_size=16)

# pre-train without latent loss
vader.pre_fit(n_epoch=50, verbose=True)
# train with latent loss
vader.fit(n_epoch=50, verbose=True)
# get the clusters
c = vader.cluster(X_train)
# get the re-constructions
p = vader.predict(X_train)
# compute the loss given the network
l = vader.get_loss(X_train)

# Run VaDER non-recurrently (ordinary VAE with GM prior)
nt = int(8)
ns = int(2e2)
sigma = np.diag(np.repeat(2, nt))
mu1 = np.repeat(-1, nt)
mu2 = np.repeat(1, nt)
a1 = np.random.multivariate_normal(mu1, sigma, ns)
a2 = np.random.multivariate_normal(mu2, sigma, ns)