def __main_gm__(): meta_df = None unlabelled_meta_df = None from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel from utils.files_destinations import home_path, destination_folder, data_folder, results_folder, plots_folder_path examples_list = ["mnist", "cifar10", "cifar100"] if example not in examples_list and not import_local_file and import_geo: g = GeoParser(home_path=home_path, geo_ids=geo_ids, unlabelled_geo_ids=unlabelled_geo_ids, bad_geo_ids=bad_geo_ids) g.get_geo(load_from_disk=load_from_disk) meta_df = g.merge_datasets(load_from_disk=load_merge, labelled=True) unlabelled_meta_df = g.merge_datasets(load_from_disk=load_merge, labelled=False) if translate is "y": for geo_id in geo_ids: g.translate_indices_df(geo_id, labelled=True) for geo_id in unlabelled_geo_ids: g.translate_indices_df(geo_id, labelled=False) meta_df, unlabelled_meta_df = adapt_datasets(meta_df, unlabelled_meta_df) is_example = False from utils.list_parameters import z_dims, h_dims, num_elements dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dim, num_elements=num_elements, use_conv=use_conv) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") if meta_df is not None: dgm.import_dataframe(meta_df, batch_size, labelled=True) if has_unlabelled: dgm.import_dataframe(unlabelled_meta_df, batch_size, labelled=False) else: dgm.import_dataframe(meta_df, batch_size, labelled=False) elif example in examples_list: dgm.load_example_dataset(dataset=example, batch_size=batch_size, labels_per_class=400) is_example = True print("PCA saved at: ", plots_folder_path) #meta_df = pd.DataFrame(dgm.train_ds) #ordination2d(meta_df, epoch="pre", dataset_name=dataset_name, ord_type="pca", # images_folder_path=plots_folder_path, info=str(geo_ids)+str(unlabelled_geo_ids)+str(bad_geo_ids)) dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour) dgm.set_data(labels_per_class=labels_per_class, is_example=is_example) dgm.cuda() if use_conv: dgm.set_conv_adgm_layers(is_hebb_layers=False, input_shape=img_shape) else: dgm.set_adgm_layers(h_dims=h_dims_classifier) # import the M1 in the M1+M2 model (Kingma et al, 2014) if load_vae: print("Importing the model: ", dgm.model_file_name) if use_conv: dgm.import_cvae() else: dgm.load_ae(load_history=False) #dgm.set_dgm_layers_pretrained() if resume: print("Resuming training") dgm.load_model() dgm.cuda() # dgm.vae.generate_random(False, batch_size, z1_size, [1, 28, 28]) dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2)
def __main__(): train_ds = None valid_ds = None meta_df = None unlabelled_meta_df = None from torchvision import transforms, datasets from models.discriminative.artificial_neural_networks.mlp_keras import mlp_keras from graveyard.HebbNet import HebbNet from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel from models.semi_supervised.deep_generative_models.models.ladder_dgm import LadderDeepGenerativeModel from models.semi_supervised.deep_generative_models.models.dgm import DeepGenerativeModel from models.generative.autoencoders.vae.vae import VariationalAutoencoder from models.generative.autoencoders.vae.ladder_vae import LadderVariationalAutoencoder from models.generative.autoencoders.vae.sylvester_vae import SylvesterVAE from utils.utils import dict_of_int_highest_elements, plot_evaluation from utils.files_destinations import home_path, destination_folder, data_folder, meta_destination_folder, \ results_folder, plots_folder_path examples_list = ["mnist", "cifar10", "cifar100"] if example not in examples_list and not import_local_file and import_geo: g = GeoParser(home_path=home_path, geo_ids=geo_ids, unlabelled_geo_ids=unlabelled_geo_ids, bad_geo_ids=bad_geo_ids) g.get_geo(load_from_disk=load_from_disk) meta_df = g.merge_datasets(load_from_disk=load_merge, labelled=True) unlabelled_meta_df = g.merge_datasets(load_from_disk=load_merge, labelled=False) if translate is "y": for geo_id in geo_ids: g.translate_indices_df(geo_id, labelled=True) for geo_id in unlabelled_geo_ids: g.translate_indices_df(geo_id, labelled=False) meta_df, unlabelled_meta_df = adapt_datasets(meta_df, unlabelled_meta_df) elif import_local_file: train_arrays = np.load(local_folder + train_images_fname, encoding="latin1") train_dataset = np.vstack(train_arrays[:, 1]) train_labels = genfromtxt(local_folder + train_labels_fname, delimiter=",", dtype=str, skip_header=True)[:, 1] test_dataset = np.vstack( np.load(local_folder + "test_images.npy", encoding="latin1")[:, 1]) meta_df = pd.DataFrame(train_dataset, columns=train_labels) img_shape = [1, 100, 100] elif import_dessins: train_labels = genfromtxt("data/kaggle_dessins/" + train_labels_fname, delimiter=",", dtype=str, skip_header=True)[:, 1] data_transform = transforms.Compose([ transforms.RandomSizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) dessins_train_data = datasets.ImageFolder( root='data/kaggle_dessins/train/', transform=data_transform) dessins_valid_data = datasets.ImageFolder( root='data/kaggle_dessins/valid/', transform=data_transform) train_ds = torch.utils.data.DataLoader(dessins_train_data, batch_size=batch_size, shuffle=True, num_workers=0) valid_ds = torch.utils.data.DataLoader(dessins_valid_data, batch_size=batch_size, shuffle=True, num_workers=0) if ords == "pca" or ords == "both" or ords == "b" or ords == "p": print("PCA saved at: ", plots_folder_path) ordination2d(meta_df, epoch="pre", dataset_name=dataset_name, ord_type="pca", images_folder_path=plots_folder_path, info=str(geo_ids) + str(unlabelled_geo_ids) + str(bad_geo_ids)) if ords is "tsne" or ords is "both" or ords is "b" or ords is "t": ordination2d(g.meta_df, dataset_name, "tsne", dataset_name, plots_folder_path + "tsne/", info=str(geo_ids) + str(unlabelled_geo_ids) + str(bad_geo_ids)) if "mlp" in nets or "MLP" in nets or all_automatic: mlp = mlp_keras() mlp.set_configs(home_path, results_folder, data_folder, destination_folder, meta_destination_folder=meta_destination_folder) mlp.import_dataframe(meta_df, batch_size) mlp.init_keras_paramters() mlp.set_data() if nvidia: mlp.cuda() for r in range(nrep): if nrep is 1: mlp.load_model() print(str(r) + "/" + str(nrep)) mlp.train(batch_size=batch_size, n_epochs=n_epochs, nrep=nrep) if evaluate_individually: mlp.evaluate_individual() top_loss_diff = dict_of_int_highest_elements( mlp.loss_diff, 20) # highest are the most affected top_index = top_loss_diff.keys() mlp.evaluate_individual_arrays(top_index) file_name = plots_folder_path + "_".join( [mlp.dataset_name, mlp.init, mlp.batch_size, mlp.nrep]) plot_evaluation(mlp.log_loss_diff_indiv, file_name + "_accuracy_diff.png", 20) if "vae" in nets or "VAE" in nets or all_automatic: from utils.parameters import h_dims, betas, num_elements is_example = True if ladder: from utils.parameters import z_dims vae = LadderVariationalAutoencoder(vae_flavour, z_dims=z_dims, h_dims=h_dims, n_flows=number_of_flows, num_elements=num_elements, auxiliary=False) z_dim = z_dims[-1] elif vae_flavour in ["o-sylvester", "h-sylvester", "t-sylvester"]: print("vae_flavour", vae_flavour) from utils.parameters import z_dim_last vae = SylvesterVAE(vae_flavour, z_dims=[z_dim_last], h_dims=h_dims, n_flows=number_of_flows, num_elements=num_elements, auxiliary=False) else: print("vae_flavour", vae_flavour) from utils.parameters import z_dim_last vae = VariationalAutoencoder(vae_flavour, h_dims=h_dims, n_flows=number_of_flows, auxiliary=False) vae.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") if meta_df is not None: vae.import_dataframe(g.meta_df, batch_size) elif example in examples_list: print("LOADING EXAMPLE") vae.load_example_dataset(dataset=example, batch_size=batch_size) is_example = True print("PCA saved at: ", plots_folder_path) meta_df = vae.train_ds ordination2d(meta_df, epoch="pre", dataset_name=dataset_name, ord_type="pca", images_folder_path=plots_folder_path, info=str(geo_ids) + str(unlabelled_geo_ids) + str(bad_geo_ids)) vae.define_configurations(vae_flavour, early_stopping=1000, warmup=warmup, ladder=ladder, z_dim=z_dim_last) vae.set_data(is_example=is_example) if ladder: print("Setting ladder layers") vae.set_lvae_layers() else: vae.set_vae_layers() if load_vae: vae.load_model() vae.run(epochs=n_epochs) if "dgm" in nets or "DGM" in nets or all_automatic: is_example = False from utils.list_parameters import z_dims, h_dims, num_elements, a_dims if auxiliary: dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dim, num_elements=num_elements, use_conv=use_conv) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") elif ladder: dgm = LadderDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, auxiliary=False) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") else: print(vae_flavour) dgm = DeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=None, auxiliary=False, num_elements=num_elements) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") if meta_df is not None: dgm.import_dataframe(meta_df, batch_size, labelled=True) if has_unlabelled: dgm.import_dataframe(unlabelled_meta_df, batch_size, labelled=False) else: dgm.import_dataframe(meta_df, batch_size, labelled=False) elif example in examples_list: dgm.load_example_dataset(dataset=example, batch_size=batch_size, labels_per_class=400) is_example = True print("PCA saved at: ", plots_folder_path) #meta_df = pd.DataFrame(dgm.train_ds) #ordination2d(meta_df, epoch="pre", dataset_name=dataset_name, ord_type="pca", # images_folder_path=plots_folder_path, info=str(geo_ids)+str(unlabelled_geo_ids)+str(bad_geo_ids)) elif import_dessins: import os dgm.batch_size = batch_size dgm.input_shape = [1, 100, 100] dgm.input_size = 10000 dgm.labels_set = os.listdir('data/kaggle_dessins/train/') dgm.num_classes = len(dgm.labels_set) dgm.make_loaders(train_ds=train_ds, valid_ds=valid_ds, test_ds=None, labels_per_class=labels_per_class, unlabelled_train_ds=None, unlabelled_samples=True) dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour) if import_local_file: dgm.labels = train_labels dgm.labels_set = list(set(train_labels)) if not import_dessins: dgm.set_data(labels_per_class=labels_per_class, is_example=is_example) dgm.cuda() if auxiliary: if use_conv: dgm.set_conv_adgm_layers(is_hebb_layers=False, input_shape=img_shape) else: dgm.set_adgm_layers(h_dims=h_dims_classifier) elif ladder: dgm.set_ldgm_layers() else: if use_conv: dgm.set_conv_dgm_layers(input_shape=img_shape) else: dgm.set_dgm_layers() # import the M1 in the M1+M2 model (Kingma et al, 2014) if load_vae: print("Importing the model: ", dgm.model_file_name) if use_conv: dgm.import_cvae() else: dgm.load_ae(load_history=False) #dgm.set_dgm_layers_pretrained() if resume: print("Resuming training") dgm.load_model() dgm.cuda() # dgm.vae.generate_random(False, batch_size, z1_size, [1, 28, 28]) dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2) if "hnet" in nets or all_automatic: net = HebbNet() if meta_df is not None: net.import_dataframe(g.meta_df, batch_size) elif example in examples_list: net.load_example_dataset(dataset=example, batch_size=batch_size, labels_per_class=400) is_example = True images_folder_path = plots_folder_path + "pca/" ordination2d(net.meta_df, "pca", images_folder_path, dataset_name=dataset_name) else: print("Example not available") exit() net.set_data() net.init_parameters(batch_size=batch_size, input_size=net.input_size, num_classes=net.num_classes, dropout=0.5, gt=[0, 0, 0, 0, 0, 0], gt_input=-10000, hyper_count=5, clampmax=100, n_channels=n_channels) net.set_layers() if torch.cuda.is_available(): net.cuda() net.run(n_epochs, hebb_round=1, display_rate=1)
def __main__(): from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel from utils.files_destinations import home_path, destination_folder, data_folder, results_folder dataset_name = "mnist_ssl_conv" n_epochs = 1000 lr = 1e-4 h_dims = [64, 32] h_dims_classifier = [128] betas = (0.9, 0.999) z_dims = [20] a_dims = [20] num_elements = 2 input_shape = [1, 28, 28] vae_flavour = "o-sylvester" number_of_flows = 4 batch_size = 64 warmup = 3 load_vae = False resume = False labels_per_class = -1 early_stopping = 100 use_conv = True auxiliary = True mc = 1 iw = 1 l1 = 0. l2 = 0. dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dims[0], num_elements=num_elements, use_conv=use_conv) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") dgm.load_example_dataset(dataset="mnist", batch_size=batch_size, labels_per_class=labels_per_class) dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour) dgm.cuda() dgm.set_conv_adgm_layers(is_hebb_layers=False, input_shape=input_shape) # import the M1 in the M1+M2 model (Kingma et al, 2014) if load_vae: print("Importing the model: ", dgm.model_file_name) if use_conv: dgm.import_cvae() else: dgm.load_ae(load_history=False) #dgm.set_dgm_layers_pretrained() if resume: print("Resuming training") dgm.load_model() dgm.cuda() # dgm.vae.generate_random(False, batch_size, z1_size, [1, 28, 28]) dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2)
def __main_gm__(): train_ds = None valid_ds = None meta_df = None unlabelled_meta_df = None from torchvision import transforms, datasets from models.discriminative.artificial_neural_networks.hebbian_network.HebbNet import HebbNet from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel from models.semi_supervised.deep_generative_models.models.ladder_dgm import LadderDeepGenerativeModel from models.semi_supervised.deep_generative_models.models.dgm import DeepGenerativeModel from files_destinations import home_path, destination_folder, data_folder, meta_destination_folder, \ results_folder, plots_folder_path examples_list = ["mnist", "cifar10", "cifar100"] if example not in examples_list and not import_local_file and import_geo: g = GeoParser(home_path=home_path, geo_ids=geo_ids, unlabelled_geo_ids=unlabelled_geo_ids, bad_geo_ids=bad_geo_ids) g.get_geo(load_from_disk=load_from_disk) meta_df = g.merge_datasets(load_from_disk=load_merge, labelled=True) unlabelled_meta_df = g.merge_datasets(load_from_disk=load_merge, labelled=False) if translate is "y": for geo_id in geo_ids: g.translate_indices_df(geo_id, labelled=True) for geo_id in unlabelled_geo_ids: g.translate_indices_df(geo_id, labelled=False) meta_df, unlabelled_meta_df = adapt_datasets(meta_df, unlabelled_meta_df) if ords == "pca" or ords == "both" or ords == "b" or ords == "p": print("PCA saved at: ", plots_folder_path) ordination2d(meta_df, epoch="pre", dataset_name=dataset_name, ord_type="pca", images_folder_path=plots_folder_path, info=str(geo_ids) + str(unlabelled_geo_ids) + str(bad_geo_ids)) if ords is "tsne" or ords is "both" or ords is "b" or ords is "t": ordination2d(g.meta_df, dataset_name, "tsne", dataset_name, plots_folder_path + "tsne/", info=str(geo_ids) + str(unlabelled_geo_ids) + str(bad_geo_ids)) if "dgm" in nets or "DGM" in nets or all_automatic: is_example = False from list_parameters import z_dims, h_dims, num_elements, a_dims if auxiliary: dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dim, num_elements=num_elements, use_conv=use_conv) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") elif ladder: dgm = LadderDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, auxiliary=False) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") else: print(vae_flavour) dgm = DeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=None, auxiliary=False, num_elements=num_elements) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") if meta_df is not None: dgm.import_dataframe(meta_df, batch_size, labelled=True) if has_unlabelled: dgm.import_dataframe(unlabelled_meta_df, batch_size, labelled=False) else: dgm.import_dataframe(meta_df, batch_size, labelled=False) elif example in examples_list: dgm.load_example_dataset(dataset=example, batch_size=batch_size, labels_per_class=400) is_example = True print("PCA saved at: ", plots_folder_path) #meta_df = pd.DataFrame(dgm.train_ds) #ordination2d(meta_df, epoch="pre", dataset_name=dataset_name, ord_type="pca", # images_folder_path=plots_folder_path, info=str(geo_ids)+str(unlabelled_geo_ids)+str(bad_geo_ids)) elif import_dessins: import os dgm.batch_size = batch_size dgm.input_shape = [1, 100, 100] dgm.input_size = 10000 dgm.labels_set = os.listdir('data/kaggle_dessins/train/') dgm.num_classes = len(dgm.labels_set) dgm.make_loaders(train_ds=train_ds, valid_ds=valid_ds, test_ds=None, labels_per_class=labels_per_class, unlabelled_train_ds=None, unlabelled_samples=True) dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour) if import_local_file: dgm.labels = train_labels dgm.labels_set = list(set(train_labels)) if not import_dessins: dgm.set_data(labels_per_class=labels_per_class, is_example=is_example) dgm.cuda() if auxiliary: if use_conv: dgm.set_conv_adgm_layers(is_hebb_layers=False, input_shape=img_shape) else: dgm.set_adgm_layers(h_dims=h_dims_classifier) elif ladder: dgm.set_ldgm_layers() else: if use_conv: dgm.set_conv_dgm_layers(input_shape=img_shape) else: dgm.set_dgm_layers() # import the M1 in the M1+M2 model (Kingma et al, 2014) if load_vae: print("Importing the model: ", dgm.model_file_name) if use_conv: dgm.import_cvae() else: dgm.load_ae(load_history=False) #dgm.set_dgm_layers_pretrained() if resume: print("Resuming training") dgm.load_model() dgm.cuda() # dgm.vae.generate_random(False, batch_size, z1_size, [1, 28, 28]) dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2) if "hnet" in nets or all_automatic: net = HebbNet() if meta_df is not None: net.import_dataframe(g.meta_df, batch_size) elif example in examples_list: net.load_example_dataset(dataset=example, batch_size=batch_size, labels_per_class=400) is_example = True images_folder_path = plots_folder_path + "pca/" ordination2d(net.meta_df, "pca", images_folder_path, dataset_name=dataset_name) else: print("Example not available") exit() net.set_data() net.init_parameters(batch_size=batch_size, input_size=net.input_size, num_classes=net.num_classes, dropout=0.5, gt=[0, 0, 0, 0, 0, 0], gt_input=-10000, hyper_count=5, clampmax=100, n_channels=n_channels) net.set_layers() if torch.cuda.is_available(): net.cuda() net.run(n_epochs, hebb_round=1, display_rate=1)
def __main__(): from data_preparation.GeoParser import GeoParser from dimension_reduction.ordination import ordination2d from sklearn.decomposition import PCA from IPython.display import Image import pandas as pd import numpy as np from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel from utils.utils import dict_of_int_highest_elements, plot_evaluation # files_destinations home_path = "/home/simon/" destination_folder = "annleukemia" data_folder = "data" results_folder = "results" meta_destination_folder = "pandas_meta_df" plots_folder_path = "/".join( [home_path, destination_folder, results_folder, "plots/"]) #dataset_name = "gse33000_and_GSE24335_GSE44768_GSE44771_GSE44770" dataset_name = "dessins" activation = "relu" #nrep = 3 betas = (0.9, 0.999) vae_flavour = "o-sylvester" early_stopping = 200 labels_per_class = 10000 n_epochs = 1000 warmup = 100 gt_input = 10000 # if ladder is yes builds a ladder vae. Do not combine with auxiliary (yet; might be possible and relatively # not too hard to implement, but might be overkill. Might be interesting too) translate = "n" # Types of deep generative model # Convolution neural network (convolutional VAE and convolutional classifier) use_conv_ae = False #Not applicable if not sequence (images, videos, sentences, DNA...) use_convnet = True # Ladder VAE (L-VAE) ladder = False # Auxiliary Variational Auto-Encoder (A-VAE) auxiliary = True # Load pre-computed vae (unsupervised learning) load_vae = False lr = 1e-4 l1 = 0. l2 = 0. batch_size = 128 mc = 1 # seems to be a problem when mc > 1 for display only, results seem good iw = 1 # seems to be a problem when iw > 1 for display only, results seem good # Neurons layers a_dim = 50 h_dims_classifier = [256] h_dims = [256, 128] z_dims = [50] # number of flows number_of_flows = 5 num_elements = 2 # Files destinations load_from_disk = True load_merge = False home_path = "/home/simon/" destination_folder = "annleukemia" data_folder = "data" results_folder = "results" meta_destination_folder = "pandas_meta_df" plots_folder_path = "/".join( [home_path, destination_folder, results_folder, "plots/"]) dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dim, num_elements=num_elements, is_hebb_layers=True, gt_input=gt_input) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers", is_unlabelled=True) dgm.load_local_dataset( root_train="/home/simon/annleukemia/data/kaggle_dessins/train", root_valid="/home/simon/annleukemia/data/kaggle_dessins/valid", root_test="/home/simon/annleukemia/data/kaggle_dessins/test", n_classes=31, batch_size=batch_size, labels_per_class=labels_per_class, extra_class=True, unlabelled_train_ds=True, normalize=False, mu=0.5, var=0.5) is_example = False # GET ordination from this! train = np.vstack([x[0].data.numpy() for x in dgm.x_train]) # unlabelled_train = np.vstack([x[0].data.numpy() for x in dgm.unlabelled_x_train]) targets = np.vstack([x[1].data.numpy() for x in dgm.x_train]) labels = [x.tolist().index(1) for x in targets] dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour) dgm.set_data(labels_per_class=labels_per_class, is_example=True, extra_class=True) planes_classifier = [1, 16, 32, 64, 128, 256, 512] classifier_kernels = [3, 3, 3, 3, 3, 3, 3] classifier_pooling_layers = [ True, True, True, True, True, True, False, False ] dgm.set_adgm_layers(h_dims=h_dims_classifier, input_shape=[1, 100, 100], use_conv_classifier=use_convnet, planes_classifier=planes_classifier, classifier_kernels=classifier_kernels, classifier_pooling_layers=classifier_pooling_layers) #dgm.set_dgm_layers() # import the M1 in the M1+M2 model (Kingma et al, 2014). Not sure if it still works... if load_vae: print("Importing the model: ", dgm.model_file_name) if use_conv_ae: dgm.import_cvae() else: dgm.load_model() # dgm.set_dgm_layers_pretrained() dgm.cuda() # dgm.vae.generate_random(False, batch_size, z1_size, [1, 28, 28]) dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2, verbose=1, show_progress=10, show_pca_train=10, show_lda_train=10, show_pca_generated=10, clip_grad=1e-5, is_input_pruning=False, start_pruning=10000, show_lda_generated=10, warmup_n=-1, alpha_rate=1000)
def __main__(): local_folder = "./data/kaggle_dessins/" train_images_fname = "train_images.npy" train_labels_fname = "train_labels.csv" home_path = "/home/simon/" destination_folder = "annleukemia" data_folder = "data" results_folder = "results" extra_class = True # TODO change to put the number... curious to see if more than one is desirable meta_destination_folder = "pandas_meta_df" plots_folder_path = "/".join( [home_path, destination_folder, results_folder, "plots/"]) dataset_name = "dessins" vae_flavour = "o-sylvester" activation = "relu" early_stopping = 200 n_epochs = 1000 gt_input = 0 use_conv = False # Not applicable if not sequence (images, videos, sentences, DNA...) lr = 1e-5 l1 = 0. l2 = 0. dropout = 0.5 batch_size = 64 number_of_flows = 4 num_elements = 3 a_dim = 20 lr = 1e-4 z_dims = [50] is_pruning = False # mc = 1 # iw = 1 # Neurons layers h_dims = [1024] planes = [1, 16, 32, 64, 128, 256, 512] kernels = [3, 3, 3, 3, 3, 3] pooling_layers = [1, 1, 1, 1, 1, 1] train_labels = genfromtxt("data/kaggle_dessins/" + train_labels_fname, delimiter=",", dtype=str, skip_header=True)[:, 1] data_transform = transforms.Compose([ transforms.RandomSizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) dessins_train_data = datasets.ImageFolder( root='data/kaggle_dessins/train/', transform=data_transform) dessins_valid_data = datasets.ImageFolder( root='data/kaggle_dessins/valid/', transform=data_transform) train_ds = torch.utils.data.DataLoader(dessins_train_data, batch_size=batch_size, shuffle=True, num_workers=0) valid_ds = torch.utils.data.DataLoader(dessins_valid_data, batch_size=batch_size, shuffle=True, num_workers=0) dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dim, num_elements=num_elements, use_conv=use_conv) dgm.batch_size = batch_size dgm.input_shape = [1, 100, 100] dgm.input_size = 10000 dgm.labels_set = os.listdir('data/kaggle_dessins/train/') dgm.num_classes = len(dgm.labels_set) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") dgm.make_loaders(train_ds=train_ds, valid_ds=valid_ds, test_ds=None, labels_per_class=labels_per_class, unlabelled_train_ds=None, unlabelled_samples=True) input_shape = [1, 100, 100] labels = train_labels mlp = ConvNet(input_shape=input_shape, num_classes=len(labels), h_dims=h_dims, extra_class=extra_class, l1=l1, l2=l2, batch_norm=True) mlp.labels = labels mlp.labels_set = list(set(labels)) mlp.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers", is_unlabelled=False) train_total_loss_histories = [[] for x in range(10)] train_accuracy_histories = [[] for x in range(10)] valid_total_loss_histories = [[] for x in range(10)] valid_accuracy_histories = [[] for x in range(10)] for i in range(10): print("Random train/valid split", i) mlp.set_data(labels_per_class=-1, is_example=False, extra_class=extra_class) mlp.glorot_init() mlp.run(n_epochs, verbose=3, show_progress=10, hist_epoch=20, is_balanced_relu=False, all0=False)
def __main__(): geo_ids = ["GSE33000"] unlabelled_geo_ids = ["GSE33000"] bad_geo_ids = [] home_path = "/home/simon/" destination_folder = "annleukemia" data_folder = "data" results_folder = "results" dataset_name = "mnist_ssl" h_dims = [512, 256] h_dims_classifier = [128] betas = (0.9, 0.999) z_dims = [100] a_dim = [100] num_elements = 3 batch_size = 64 # if smaller not working... should work though... number_of_flows = 10 input_shape = [1, 35371] # Total number of inputs in microarray. Each microarray might have a different number, # corresponding in part often, but different microarrays might be difficult to put # together if they don't sufficiently overlap (though not impossible... ) warmup = 0 use_conv = False vae_flavour = "o-sylvester" labels_per_class = -1 early_stopping = 100 has_unlabelled = False load_vae = False resume = False # not working... TODO to be corrected, would be useful n_epochs = 10000 auxiliary = True ladder = False mc = 1 iw = 1 l1 = 1e-6 l2 = 1e-6 lr = 1e-5 load_from_disk = True translate = False load_merge = False num_classes = 3 from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel from models.semi_supervised.deep_generative_models.models.ladder_dgm import LadderDeepGenerativeModel from models.semi_supervised.deep_generative_models.models.dgm import DeepGenerativeModel g = GeoParser(home_path=home_path, geo_ids=geo_ids, unlabelled_geo_ids=unlabelled_geo_ids, bad_geo_ids=bad_geo_ids) g.get_geo(load_from_disk=load_from_disk, automatic_attribute_list=True) meta_df = g.merge_datasets(load_from_disk=load_merge, labelled=True) unlabelled_meta_df = g.merge_datasets(load_from_disk=load_merge, labelled=False) if translate is "y": for geo_id in geo_ids: g.translate_indices_df(geo_id, labelled=True) for geo_id in unlabelled_geo_ids: g.translate_indices_df(geo_id, labelled=False) meta_df, unlabelled_meta_df = adapt_datasets(meta_df, unlabelled_meta_df) is_example = False extra_class = True if auxiliary: dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dim, num_elements=num_elements, use_conv=use_conv, labels_per_class=labels_per_class) dgm.set_configs(num_classes=num_classes, extra_class=extra_class, home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") elif ladder: dgm = LadderDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, auxiliary=False, labels_per_class=labels_per_class) dgm.set_configs(extra_class=extra_class, home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") else: print(vae_flavour) dgm = DeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=None, auxiliary=False, num_elements=num_elements, labels_per_class=labels_per_class) dgm.set_configs(extra_class=extra_class, home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") if auxiliary: if use_conv: dgm.set_conv_adgm_layers(input_shape=input_shape) else: dgm.set_adgm_layers(h_dims=h_dims_classifier, input_shape=input_shape) elif ladder: dgm.set_ldgm_layers() else: if use_conv: dgm.set_conv_dgm_layers(input_shape=input_shape) else: dgm.set_dgm_layers(input_shape=input_shape) # import the M1 in the M1+M2 model (Kingma et al, 2014) if load_vae: print("Importing the model: ", dgm.model_file_name) if use_conv: dgm.import_cvae() else: dgm.load_ae(load_history=False) #dgm.set_dgm_layers_pretrained() if resume: print("Resuming training") dgm.load_model() dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour) if meta_df is not None: dgm.import_dataframe(meta_df, batch_size, labelled=True) if has_unlabelled: dgm.import_dataframe(unlabelled_meta_df, batch_size, labelled=False) else: dgm.import_dataframe(meta_df, batch_size, labelled=False) dgm.set_data(labels_per_class, ratio_training=0.8, ratio_valid=0.1, is_example=is_example, is_split=True, ignore_training_inputs=0, is_custom_data=False) dgm.cuda() import os os.chdir(home_path + "/" + destination_folder) dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2, generate_extra_class=1000)
def __main__(): import os # os.chdir("..") # To return at the root of the project from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel home_path = "/home/simon/" destination_folder = "annleukemia" data_folder = "data" results_folder = "results" dataset_name = "mnist_ssl" h_dims = [300, 300] h_dims_classifier = [128] betas = (0.9, 0.999) z_dims = [10] a_dims = [10] num_elements = 2 batch_size = 128 number_of_flows = 10 input_shape = [1, 28, 28] lr = 1e-4 warmup = 10 meta_df = None unlabelled_meta_df = None use_conv = False # vae_flavour = "o-sylvester" set in set_vae_flavour.py from set_vae_flavour import vae_flavour labels_per_class = -1 early_stopping = 200 has_unlabelled = False load_vae = False resume = False # not working... TODO to be corrected, would be useful n_epochs = 1000 auxiliary = True classif_weight = 1.0 repetitions = 10 clip_grad = 1e-4 mc = 1 iw = 1 l1 = 0 l2 = 0 num_classes = 10 dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dims[0], num_elements=num_elements, use_conv=use_conv) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") if meta_df is not None: dgm.import_dataframe(meta_df, batch_size, labelled=True) if has_unlabelled: dgm.import_dataframe(unlabelled_meta_df, batch_size, labelled=False) else: dgm.import_dataframe(meta_df, batch_size, labelled=False) dgm.load_example_dataset(dataset="mnist", batch_size=batch_size, labels_per_class=labels_per_class, extra_class=True) dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour, model_name="mnist_vae", z_dim=z_dims[-1]) dgm.cuda() if use_conv: dgm.set_conv_adgm_layers(is_hebb_layers=False, input_shape=input_shape) else: dgm.set_adgm_layers(h_dims=h_dims_classifier, input_shape=input_shape, num_classes=num_classes) # import the M1 in the M1+M2 model (Kingma et al, 2014) if load_vae: print("Importing the model: ", dgm.model_file_name) if use_conv: dgm.import_cvae() else: dgm.load_ae(load_history=False) #dgm.set_dgm_layers_pretrained() if resume: print("Resuming training") dgm.load_model() dgm.cuda() log_likehihoods = dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2, t_max=classif_weight, verbose=1, generate_extra_class=1000, clip_grad=clip_grad, times=repetitions) mean_ll = np.mean(log_likehihoods) print("Mean:", mean_ll)
def __main__(): from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel from models.semi_supervised.deep_generative_models.models.ladder_dgm import LadderDeepGenerativeModel from models.semi_supervised.deep_generative_models.models.dgm import DeepGenerativeModel from utils.files_destinations import home_path, destination_folder, data_folder, results_folder h_dims = [64, 32] h_dims_classifier = [128] betas = (0.9, 0.999) z_dims = [20] a_dims = [20] num_elements = 2 img_shape = [1, 28, 28] meta_df = None unlabelled_meta_df = None labels_per_class = -1 early_stopping = 100 if auxiliary: dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows,a_dim=a_dims[0], num_elements=num_elements, use_conv=use_conv) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") elif ladder: dgm = LadderDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, auxiliary=False) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") else: print(vae_flavour) dgm = DeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=None, auxiliary=False, num_elements=num_elements) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=initial_lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") if meta_df is not None: dgm.import_dataframe(meta_df, batch_size, labelled=True) if has_unlabelled: dgm.import_dataframe(unlabelled_meta_df, batch_size, labelled=False) else: dgm.import_dataframe(meta_df, batch_size, labelled=False) dgm.load_example_dataset(dataset="mnist", batch_size=batch_size, labels_per_class=labels_per_class) dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour) dgm.cuda() if auxiliary: if use_conv: dgm.set_conv_adgm_layers(is_hebb_layers=False, input_shape=img_shape) else: dgm.set_adgm_layers(h_dims=h_dims_classifier) elif ladder: dgm.set_ldgm_layers() else: if use_conv: dgm.set_conv_dgm_layers(input_shape=img_shape) else: dgm.set_dgm_layers() # import the M1 in the M1+M2 model (Kingma et al, 2014) if load_vae: print("Importing the model: ", dgm.model_file_name) if use_conv: dgm.import_cvae() else: dgm.load_ae(load_history=False) #dgm.set_dgm_layers_pretrained() if resume: print("Resuming training") dgm.load_model() dgm.cuda() # dgm.vae.generate_random(False, batch_size, z1_size, [1, 28, 28]) dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2 )
def __main__(): from models.semi_supervised.utils.utils import onehot from models.semi_supervised.deep_generative_models.models.auxiliary_dgm import AuxiliaryDeepGenerativeModel local_folder = "./data/kaggle_dessins/" train_images_fname = "train_images.npy" train_labels_fname = "train_labels.csv" home_path = "/home/simon/" destination_folder = "annleukemia" data_folder = "data" results_folder = "results" dataset_name = "dessins_ssl" betas = (0.9, 0.999) z_dims = [32] a_dims = [50] h_dims = [128, 64] num_elements = 5 batch_size = 32 number_of_flows = 10 input_shape = [1, 100, 100] lr = 3e-5 warmup = 100 meta_df = None unlabelled_meta_df = None use_conv = False vae_flavour = "o-sylvester" labels_per_class = 100 early_stopping = 100 has_unlabelled = False load_vae = False resume = False # not working... TODO to be corrected, would be useful n_epochs = 10000 auxiliary = True mc = 1 iw = 1 l1 = 0. l2 = 0. dgm = AuxiliaryDeepGenerativeModel(vae_flavour, z_dims, h_dims, n_flows=number_of_flows, a_dim=a_dims[0], num_elements=num_elements, use_conv=use_conv) dgm.set_configs(home_path=home_path, results_folder=results_folder, data_folder=data_folder, destination_folder=destination_folder, dataset_name=dataset_name, lr=lr, meta_destination_folder="meta_pandas_dataframes", csv_filename="csv_loggers") train_labels = genfromtxt("data/kaggle_dessins/train_labels.csv", delimiter=",", dtype=str, skip_header=True)[:, 1] train_labels_set = set(train_labels) num_classes = len(train_labels_set) data_transform = transforms.Compose([ #transforms.RandomRotation(180), #transforms.RandomHorizontalFlip(), #transforms.RandomVerticalFlip(), transforms.Grayscale(), transforms.ToTensor(), #transforms.ColorJitter() ]) dessins_train_data = datasets.ImageFolder( root='data/kaggle_dessins/train/', transform=data_transform) dessins_valid_data = datasets.ImageFolder( root='data/kaggle_dessins/valid/', transform=data_transform) train_ds = torch.utils.data.DataLoader(dessins_train_data, batch_size=batch_size, shuffle=True, num_workers=0) valid_ds = torch.utils.data.DataLoader(dessins_valid_data, batch_size=batch_size, shuffle=True, num_workers=0) dgm.batch_size = batch_size dgm.input_shape = input_shape dgm.input_size = np.prod(input_shape) dgm.make_loaders(train_ds=train_ds, valid_ds=valid_ds, test_ds=None, labels_per_class=-1, unlabelled_train_ds=None, unlabelled_samples=True) dgm.define_configurations(early_stopping=early_stopping, warmup=warmup, flavour=vae_flavour) dgm.labels_set = os.listdir('data/kaggle_dessins/train/') dgm.num_classes = len(dgm.labels_set) dgm.labels = train_labels dgm.cuda() dgm.train_loader = dgm.train_loader.dataset dgm.valid_loader = dgm.valid_loader.dataset dgm.set_data(labels_per_class=-1, is_example=False, extra_class=False, has_unlabelled_samples=False, is_split=False, is_custom_data=True) dgm.set_adgm_layers(is_hebb_layers=False, input_shape=input_shape, h_dims=h_dims) dgm.run(n_epochs, auxiliary, mc, iw, lambda1=l1, lambda2=l2, clip_grad=0.)