def autoencoder_subtask_deep_fashion(): # train AET with DeepFashion encoder_trained, losses = train_autoencoder_deep_fashion() plot_n_curves(losses, "Train losses", "Loss training autoencoder DeepFashion", axis2="Loss") # transfer learning with DeepFashion encoder_ft, train_losses, val_losses, train_acc, val_acc = fine_tune_autoencoder_deep_fashion( encoder_trained) # test with DeepFashion average_test_loss, average_test_accuracy = test_autoencoder_deep_fashion( encoder_ft) plot_summary([train_acc, val_acc], average_test_accuracy, ["train accuracy", "val accuracy", "test accuracy"], "Accuracy autoencoder DeepFashion", axis2="Accuracy") plot_summary([train_losses, val_losses], average_test_loss, ["train loss", "val loss", "test average loss"], "Loss autoencoder DeepFashion", axis2="Loss") return average_test_loss, average_test_accuracy
def exemplar_cnn_subtask_deep_fashion(): # train exemplar cnn with DeepFashion ex_cnn_trained, losses, accuracies = train_exemplar_cnn_deep_fashion() plot_n_curves([losses], ["train loss"], "Loss train ExemplarCNN DeepFashion", axis2="Loss") plot_n_curves([accuracies], ["train accuracy"], "Accuracy train ExemplarCNN DeepFashion", axis2="Accuracy") # fine tune exemplar cnn with DeepFashion ex_cnn_finetuned, train_losses, val_losses, train_acc, val_acc = fine_tune_exemplar_cnn_deep_fashion( ex_cnn_trained) # test with DeepFashion average_test_loss, average_test_accuracy = test_classification_on_exemplar_cnn_deep_fashion( ex_cnn_finetuned) plot_summary([train_acc, val_acc], average_test_accuracy, ["train accuracy", "val accuracy", "test accuracy"], "Accuracy Test ExemplarCNN Deep Fashion", axis2="Accuracy") plot_summary([train_losses, val_losses], average_test_loss, ["train loss", "val loss", "test loss"], "Loss Test ExemplarCNN Deep Fashion", axis2="Loss") return average_test_loss, average_test_accuracy
def autoencoder_subtask_fashion_mnist(): # train autoencoder with FashionMNIST encoder_trained, losses = train_autoencoder_mnist() plot_n_curves(losses, "Train losses", "Loss train autoencoder Fashion MNIST", axis2="Loss") # transfer learning with FashionMNIST encoder_ft, train_losses, val_losses, train_acc, val_acc = fine_tune_autoencoder_mnist( encoder_trained) # test with FashionMNIST average_test_loss, average_test_accuracy = test_autoencoder_mnist( encoder_ft) plot_summary([train_acc, val_acc], average_test_accuracy, ["train accuracy", "val accuracy", "test accuracy"], "Accuracy test autoencoder FashionMNIST", axis2="Accuracy") plot_summary([train_losses, val_losses], average_test_loss, ["train loss", "val loss", "test average loss"], "Loss test autoencoder FashionMNIST", axis2="Loss") return average_test_loss, average_test_accuracy
def rotation_subtask_fashion_mnist(): # train rotation net with FashionMNIST rotnet_trained, train_losses, val_losses, train_acc, val_acc = train_rotation_net( ) plot_n_curves([train_losses, val_losses], ["train loss", "val loss"], "Loss rotation FashionMNIST", axis2="Loss") plot_n_curves([train_acc, val_acc], ["train accuracy", "val accuracy"], "Accuracy rotation FashionMNIST", axis2="Accuracy") # fine tune rotation net with FashionMNIST rotnet_ft, train_losses_ft, val_losses_ft, train_acc_ft, val_acc_ft = fine_tune_rotation_model( rotnet_trained) # test with FashionMNIST average_test_loss, average_test_accuracy = test_classification_on_rotation_model( rotnet_ft) plot_summary([train_acc_ft, val_acc_ft], average_test_accuracy, ["train accuracy", "val accuracy", "test accuracy"], "Accuracy Test Rotation FashionMNIST", axis2="Accuracy") plot_summary([train_losses_ft, val_losses_ft], average_test_loss, ["train loss", "val loss", "test loss"], "Loss Test Rotation FashionMNIST", axis2="Loss") return average_test_loss, average_test_accuracy
def supervised_fashion_mnist(): # supervised training with Fashion MNIST sv_trained, train_losses, val_losses, train_acc, val_acc = train_supervised_FashionMNIST() # test with Fashion MNIST average_test_loss, average_test_accuracy = test_classification_on_supervised_fashionMNIST(sv_trained) plot_summary([train_acc, val_acc], average_test_accuracy, ["train accuracy", "val accuracy", "test accuracy"], "Accuracy Test Supervised Fashion MNIST", axis2="Accuracy") plot_summary([train_losses, val_losses], average_test_loss, ["train loss", "val loss", "test loss"], "Loss Test Supervised Fashion MNIST", axis2="Loss") return average_test_loss, average_test_accuracy
def supervised_deep_fashion(): # supervised training with DeepFashion sv_trained, train_losses, val_losses, train_acc, val_acc = train_supervised_deep_fashion( ) # test with DeepFashion average_test_loss, average_test_accuracy = test_classification_deep_fashion( sv_trained) plot_summary([train_acc, val_acc], average_test_accuracy, ["train accuracy", "val accuracy", "test accuracy"], "Accuracy Test Supervised Deep Fashion", axis2="Accuracy") plot_summary([train_losses, val_losses], average_test_loss, ["train loss", "val loss", "test loss"], "Loss Test Supvervised Deep Fashion", axis2="Loss") return average_test_loss, average_test_accuracy
def main(args): ######################################################################################## # Model/Data Loading ####################################################################################### # Dictionary for holding numerical results of experiment exp_results = {} # Load the model, as well as input, label, and concept data model, x_train, y_train, x_test, y_test, c_train, c_test, c_names = get_model_data( args) print("Model and data loaded successfully...") # Evaluate network metrics scores = model.evaluate(x_test, y_test, verbose=0, batch_size=1000) print('Original Model Accuracy: {}'.format(scores[1])) # Save task accuracy of original model exp_results['model_task_acc'] = scores[1] # Retrieve original model output labels y_train_model = model.predict_classes(x_train) y_test_model = model.predict_classes(x_test) ######################################################################################## # Concept Extraction ####################################################################################### # Filter out ids of model layers with weights layer_ids = [ i for i in range(len(model.layers)) if model.layers[i].weights != [] ] # Select ids of layers to be inspected start_layer = args.start_layer layer_ids = layer_ids[start_layer:] # Specify parameters for the concept extractor params = { "layer_ids": layer_ids, "layer_names": [model.layers[i].name for i in layer_ids], "batch_size": args.batch_size_extract, "concept_names": c_names, "n_concepts": len(c_names), "method": args.itc_model } # Split into labelled and unlabelled n_labelled = args.n_labelled n_unlabelled = args.n_unlabelled x_train_l, c_train_l, y_train_l, \ x_train_u, c_train_u, y_train_u = labelled_unlabelled_split(x_train, c_train, y_train, n_labelled=n_labelled, n_unlabelled=n_unlabelled) print("Generating concept extractor...") # Select concept extractor to use and train it if args.itc_method == 'cme': conc_extractor = ItCModel(model, **params) else: params["layer_id"] = -4 conc_extractor = Net2Vec(model, **params) conc_extractor.train(x_train_l, c_train_l, x_train_u) print("Concept extractor generated successfully...") # Predict test and train set concepts c_test_pred = conc_extractor.predict_concepts(x_data=x_test) c_train_pred = conc_extractor.predict_concepts(x_data=x_train) ######################################################################################## # Label Predictor ####################################################################################### # Specify parameters for label predictor models params = {"method": args.ctl_model, "concept_names": c_names} # Generate label predictor model # Trained on GROUND TRUTH concept labels and MODEL predictions conc_model_gt = CtLModel(c_train, y_train_model, **params) # Generate label predictor model # Trained on CONCEPT EXTRACTOR concept labels and MODEL predictions conc_model_extr = CtLModel(c_train_pred, y_train_model, **params) ######################################################################################## # Results Generation ####################################################################################### # Specify figure suffix name figs_path = args.figs_path figure_suffix = "task-{}".format(args.task_name) # Get per-concept accuracies conc_accs = [ accuracy_score(c_test[:, i], c_test_pred[:, i]) * 100 for i in range(c_test.shape[1]) ] print("Concept Accuracies: ") for i in range(len(conc_accs)): print(c_names[i], " : ", str(conc_accs[i])) # Save concept accuracy results exp_results['concept_names'] = c_names exp_results['concept_accuracies'] = conc_accs if args.tsne_vis: # Get t-SNE projections print("visualising t-SNE projections") tsne_fig = visualise_hidden_space(x_train[:args.n_tsne_samples], c_train[:args.n_tsne_samples], conc_extractor.concept_names, conc_extractor.layer_names, conc_extractor.layer_ids, model) tsne_fig.show() # Save tSNE plot figure if figs_path is not None: tsne_fig.savefig(os.path.join(figs_path, 'tsne-' + figure_suffix + '.png'), dpi=150) # Evaluate fidelity of label predictor trained on GT concepts y_test_pred = conc_model_gt.predict(c_test) score_gt = accuracy_score(y_test_model, y_test_pred) * 100 print("Fidelity of Label Predictor trained on GT concept values: ", score_gt) # Evaluate fidelity and task accuracy of label predictor trained on predicted concepts y_test_pred = conc_model_extr.predict(c_test_pred) score_extr = accuracy_score(y_test_model, y_test_pred) * 100 acc_score_extr = accuracy_score(y_test, y_test_pred) * 100 print("Fidelity of Label Predictor trained on predicted concept values: ", score_extr) print("Accuracy of Label Predictor trained on predicted concept values: ", acc_score_extr) # Save the scores exp_results['ctl_gt_fidelity'] = score_gt exp_results['ctl_extr_fidelity'] = score_extr exp_results['ctl_extr_accuracy'] = acc_score_extr # Plot the Label Predictor model trained on extracted concepts ctl_fig = plot_summary(conc_model_extr) # Save figure if figs_path is not None: ctl_fig.save_fig(os.path.join(figs_path, 'ctl-' + figure_suffix + '.png'), dpi=150) return exp_results
def fidelity_experiments(args): # Retrive/define necessary parameters saved_model_path = args.model_path metadata_dir = args.metadata_dir img_dir = args.img_dir use_gpu = args.use_gpu n_samples_per_cls = args.n_samples_per_cls extr_method = args.itc_model n_labelled = args.n_labelled n_unlabelled = args.n_unlabelled preds_save_pth = args.preds_save_pth # Load train and test CUB data model, x_train_paths, y_train_data, \ x_test_paths, y_test_data, c_train_data, c_test_data, c_names = load_cub_data(saved_model_path, metadata_dir, img_dir, use_gpu, n_samples_per_cls) print("Loaded CUB data successfull...") # In this experiment, use only the final few layers for concept extraction layer_ids = [-3, -2, -1] c_names = [str(i) for i in range(c_test_data.shape[1])] # Val - data used for extracted model training and evaluation x_val_paths = x_train_paths c_val_data = c_train_data y_val_data = y_train_data # Retrieve model output labels of original CUB model y_val_model = model.predict_batched(x_val_paths) y_test_model = model.predict_batched(x_test_paths) acc = accuracy_score(y_val_data, y_val_model) print("Validation accuracy of model: ", acc) # Evaluate network metrics acc = accuracy_score(y_test_data, y_test_model) print('Test accuracy: {}'.format(acc)) # Specify model extraction parameters layer_names = [model.layer_names[i] for i in layer_ids] params = { "layer_ids": layer_ids, "layer_names": layer_names, "concept_names": c_names, "method": extr_method } # Split into labelled and unlabelled x_train_l_paths, c_train_l, x_train_u_paths, c_train_u = labelled_unlabbeled_split_fpaths( x_val_paths, c_val_data, n_labelled=n_labelled, n_unlabelled=n_unlabelled) print("Split into labelled/unlabelled") # Generate concept-extraction model conc_extractor = ItCModel_CUB(model, **params) conc_extractor.train(x_train_l_paths, c_train_l, x_train_u_paths) print("Concept Summary extracted successfully...") # Predict concepts of other dataset points c_test_extr = conc_extractor.predict_concepts(x_test_paths) # Plot per-concept accuracy: accuracies = [ accuracy_score(c_test_data[:, i], c_test_extr[:, i]) * 100 for i in range(c_test_data.shape[1]) ] f1s = [ f1_score(c_test_data[:, i], c_test_extr[:, i]) * 100 for i in range(c_test_data.shape[1]) ] print("F1s: ") print(f1s) print("Avg acc.: ", str(sum(accuracies) / len(accuracies))) print("Avg f1.: ", str(sum(f1s) / len(f1s))) # =========================================================================== # Results Generation # =========================================================================== # Save model outputs, if flag specified if preds_save_pth is not None: y_test_data_pth = os.path.join(preds_save_pth, "y_true.npy") y_test_model_pth = os.path.join(preds_save_pth, "y_pred.npy") c_test_extr_path = os.path.join(preds_save_pth, "c_pred.npy") c_test_data_path = os.path.join(preds_save_pth, "c_true.npy") np.save(y_test_data_pth, y_test_data) np.save(y_test_model_pth, y_test_model) np.save(c_test_extr_path, c_test_extr) np.save(c_test_data_path, c_test_data) # Define dictionary containing all necessary results exp_results_dict = {} avg_acc = sum(accuracies) / len(accuracies) print("Per-concept accuracy: ", avg_acc) # Save phat accuracy exp_results_dict["phat_c_acc"] = accuracies exp_results_dict["phat_c_names"] = c_names # Get t-SNE projections print("visualising t-SNE projections") n_sum_sample = 50 tsne_fig = visualise_hidden_space(x_train_l_paths[:n_sum_sample], c_train_l[:n_sum_sample], c_names, layer_names, layer_ids, model) tsne_fig.show() # Train concept model # Specify model extraction parameters CModel_method = "LR" params = {"method": CModel_method, "concept_names": c_names} # Train q-hat on ground-truth concepts conc_model = CtLModel(c_val_data[:800], y_val_model[:800], **params) # Evaluate performance of q-hat y_test_extr = conc_model.predict(c_test_data) score = accuracy_score(y_test_model, y_test_extr) * 100 print("Fidelity of q-hat trained on ground-truth concepts: ", score) # Save qhat accuracy exp_results_dict["qhat_acc"] = score # Plot the q-hat model plot_summary(conc_model) # Concept values predicted by p-hat c_train_extr = conc_extractor.predict_concepts(x_val_paths) # q-hat trained on predicted concept values new_conc_model = CtLModel(c_train_extr[:800], y_val_model[:800], **params) # predict x_test concepts using p-hat and predict labels from these concepts using q-hat c_test_extr = conc_extractor.predict_concepts(x_test_paths) y_test_extr = new_conc_model.predict(c_test_extr) # Compute fidelity compared to model score = accuracy_score(y_test_model, y_test_extr) * 100 print("Fidelity of f-hat: ", score) # Save fhat accuracy exp_results_dict["fhat_acc"] = score # Compute accuracy, compared to model acc_score = accuracy_score(y_test_data, y_test_extr) print("Accuracy of f-hat: ", acc_score) # Evaluate performance of q-hat, trained on p-hat-computed values y_test_extr = new_conc_model.predict(c_test_data) print("Accuracy of q-hat, using ground-truth concepts: ", accuracy_score(y_test_data, y_test_extr)) # Plot the f-hat model plot_summary(new_conc_model) print(exp_results_dict) return exp_results_dict