def test_svm_on_plain(svm_train_dataset, svm_test_database, radii=[[8, 16, 24]], binary=True): radius = min(radii[0]) h_w = 2 * radius + 1 im_size = h_w * h_w * len(radii[0]) X_train, y_train = get_dataset_as_tensor(svm_train_dataset) latent_train = X_train.view(-1, im_size) SVMClassifier = svm.SVC(probability=True) SVMClassifier.fit(latent_train.numpy(), y_train.numpy()) X_test, y_test = get_dataset_as_tensor(svm_test_database) latent_test = X_test.view(-1, im_size) predicted_train = SVMClassifier.predict(latent_train.numpy()) predicted_test = SVMClassifier.predict(latent_test.numpy()) train_accuracy = accuracy_score(y_train.numpy(), predicted_train) test_accuracy = accuracy_score(y_test.numpy(), predicted_test) predicted_test_probas = SVMClassifier.predict_proba(latent_test.numpy()) if binary: predicted_test_probas = predicted_test_probas[:, 1] test_auc = sklearn.metrics.roc_auc_score(y_test.numpy().squeeze(), predicted_test_probas, multi_class='ovo') return { 'svm_train_special_accuracy': train_accuracy, 'svm_test_special_accuracy': test_accuracy, 'svm_test_special_auc': test_auc } # print(svm_for_classifying())
def knn_classifier_test(model, knn_train_dataset, knn_validation_dataset, test_dataset, type_of_knn_evaluation_name): X_train, y_train = get_dataset_as_tensor(knn_train_dataset) if model.hparams.contrastive: X_train = X_train.cuda() _, latent_train = model.forward(X_train) knn_classifier = KNeighborsClassifier(n_neighbors=len(knn_train_dataset) // 2) if model.hparams.contrastive: latent_train = latent_train.cpu() y_train = y_train.cpu() latent_train_numpy = latent_train.numpy() y_train_numpy = y_train.numpy() knn_classifier.fit(latent_train_numpy, y_train_numpy.ravel()) train_accuracy = knn_accuracy_on_dataset_in_latent_space(knn_classifier, knn_train_dataset, model) validation_accuracy = knn_accuracy_on_dataset_in_latent_space(knn_classifier, knn_validation_dataset, model) if len(test_dataset.class_names) == 2: test_accuracy = knn_accuracy_on_dataset_in_latent_space(knn_classifier, test_dataset, model) return {f'knn_train_{type_of_knn_evaluation_name}_accuracy': train_accuracy, f'knn_validation_{type_of_knn_evaluation_name}_accuracy': validation_accuracy, f'knn_test_{type_of_knn_evaluation_name}_accuracy': test_accuracy}
def knn_accuracy_on_dataset_in_latent_space(knn_classfier, dataset: Dataset, model, use_gpu) -> Tensor: ''' Args: SVMClassifier: the svm classifier with which the classifying should be done dataset: the dataset we want to classify on model: the model which produces the latent space Returns: the efficient accuracy ''' X, y = get_dataset_as_tensor(dataset) if use_gpu or model.hparams.use_gpu: X = X.cuda() if use_gpu: latent = model.forward(X) else: _, latent = model.forward(X) if use_gpu or model.hparams.use_gpu: latent = latent.cpu() predicted = knn_classfier.predict(latent.numpy()) accuracy = accuracy_score(y.numpy(), predicted) return accuracy
def get_accuracy_for_small_dataset(self, dataset): X, y = get_dataset_as_tensor(dataset) outputs, _ = self.forward(X.float()) _, predicted = torch.max(outputs.data, 1) batch_size, channels, _, _ = X.size() accuracy = torch.tensor( [float(torch.sum(predicted == y.squeeze())) / batch_size]) return accuracy
def svm_accuracy_on_dataset_in_latent_space(SVMClassifier, dataset: Dataset, model, predict_probas=False, multi=False, use_gpu=False) -> Tensor: ''' Args: SVMClassifier: the svm classifier with which the classifying should be done dataset: the dataset we want to classify on model: the model which produces the latent space Returns: the efficient accuracy ''' X, y = get_dataset_as_tensor(dataset) if model is not None and (use_gpu or model.hparams.use_gpu): X = X.cuda() if model is not None: if use_gpu: latent = model.forward(X) else: _, latent = model.forward(X) else: latent = X.reshape(len(X), -1) if model is not None and (use_gpu or model.hparams.use_gpu): latent = latent.cpu() predicted = SVMClassifier.predict(latent.numpy()) accuracy = accuracy_score(y.numpy(), predicted) if predict_probas: probas = SVMClassifier.predict_proba(latent.numpy()) y_np = y.numpy().squeeze() if multi: auc = sklearn.metrics.roc_auc_score(y_np, probas, multi_class='ovo') else: auc = sklearn.metrics.roc_auc_score(y_np, probas[:, 1], multi_class='ovo') # sklearn.metrics.plot_roc_curve(SVMClassifier, latent.numpy(), y.numpy().squeeze()) # import matplotlib.pyplot as plt # plt.savefig(f'{time.time()}.png') return accuracy, auc f1_macro = sklearn.metrics.f1_score(y.numpy(), predicted, average='macro') f1_micro = sklearn.metrics.f1_score(y.numpy(), predicted, average='micro') accuracy = sklearn.metrics.accuracy_score(y.numpy(), predicted) return accuracy, f1_micro, f1_macro
def knn_classifier_test(model, knn_train_dataset, knn_validation_dataset, test_dataset, type_of_knn_evaluation_name, use_gpu=False): X_train, y_train = get_dataset_as_tensor(knn_train_dataset) try: if use_gpu or model.hparams.use_gpu: X_train = X_train.cuda() except: pass if use_gpu: # means contrastive latent_train = model.forward(X_train) else: _, latent_train = model.forward(X_train) knn_classifier = KNeighborsClassifier() if use_gpu or model.hparams.use_gpu: latent_train = latent_train.cpu() y_train = y_train.cpu() latent_train_numpy = latent_train.numpy() y_train_numpy = y_train.numpy() knn_classifier.fit(latent_train_numpy, y_train_numpy) train_accuracy = knn_accuracy_on_dataset_in_latent_space( knn_classifier, knn_train_dataset, model, use_gpu) validation_accuracy = knn_accuracy_on_dataset_in_latent_space( knn_classifier, knn_validation_dataset, model, use_gpu) if len(test_dataset.class_names) == 2: test_accuracy = knn_accuracy_on_dataset_in_latent_space( knn_classifier, test_dataset, model, use_gpu) model.svm_validation_accuracy = validation_accuracy model.svm_test_accuracy = test_accuracy return { f'knn_train_{type_of_knn_evaluation_name}_accuracy': train_accuracy, f'knn_validation_{type_of_knn_evaluation_name}_accuracy': validation_accuracy, f'knn_test_{type_of_knn_evaluation_name}_accuracy': test_accuracy }
def set_pre_defined_datasets(self, random_dataset, typical_images_dataset, random_points_list=None): ''' prepare the self.random_dataset, and self.typical_images_dataset Datasets ''' self.random_dataset = random_dataset self.typical_images_dataset = typical_images_dataset self.random_images_as_tensor = get_dataset_as_tensor( self.random_dataset)[0] self.random_points_list = random_points_list assert len(self.random_points_list) == len( self.random_images_as_tensor)
def knn_accuracy_on_dataset_in_latent_space(knn_classfier, dataset: Dataset, model) -> torch.Tensor: ''' Args: knnClassifier: the knn classifier with which the classifying should be done dataset: the dataset we want to classify on model: the model which produces the latent space Returns: the efficient accuracy ''' X, y = get_dataset_as_tensor(dataset) if model.hparams.contrastive: X = X.cuda() _, latent = model.forward(X) if model.hparams.contrastive: latent = latent.cpu() predicted = knn_classfier.predict(latent.numpy()) accuracy = accuracy_score(y.numpy(), predicted) return accuracy
def test(self, number_to_log=5): ''' log, through the self.feature_extractor logger, the test of the k nearest neighbours of the self.typical_images_dataset dataset ''' random_images_as_np = self.random_images_as_tensor.data.numpy() typical_images_as_tensor = get_dataset_as_tensor( self.typical_images_dataset)[0] typical_images_as_np = typical_images_as_tensor.data.numpy() # images in latent space random_images_latent_as_np = get_dataset_latent_space_as_np( self.feature_extractor, self.random_images_as_tensor) typical_images_latent_as_np = get_dataset_latent_space_as_np( self.feature_extractor, typical_images_as_tensor) typical_images_set_to_show = convert_multi_radius_tensor_to_printable( typical_images_as_tensor) closest_images_list_image_space, _ = self.knn_algo_image_space( random_images_as_np.reshape(random_images_as_np.shape[0], -1), typical_images_as_np.reshape(typical_images_as_np.shape[0], -1)) closest_images_list_latent_space, closest_points_list_latent_space = self.knn_algo_image_space( random_images_latent_as_np, typical_images_latent_as_np, take_mean=(self.method == 'group_from_file')) # the return is: # closest images (in images space) list # closest images (in latent space) list # the latent of images that where the input, # the "visual" way of the images that where the input # number of images that were asked to be logged # locations of the closest images (in latent space) list # locations of the closest images (in latent space) list return closest_images_list_image_space, closest_images_list_latent_space, typical_images_latent_as_np, \ typical_images_set_to_show, number_to_log, closest_points_list_latent_space
def prepare_data(self): ''' init the: self.random_dataset - dataset to search in self.typical_images_dataset - Dataset self.random_images_as_tensor - the dataset as tensor according to the self.method method. ''' random_dataset = RandomDataset(self.random_set_size, self.original_radiis, VALIDATION_HALF, self.radii) size_typical_images = 5 if self.method == 'regular': typical_images_dataset = ClassDataset(POINT_TO_SEARCH_SIMILAR, 1, self.original_radiis, size_typical_images, self.radii, VALIDATION_HALF, 'test_knn') elif self.method == 'group_from_file': typical_images_dataset = ClassDataset(self.json_file_of_group, 1, self.original_radiis, size_typical_images, self.radii, VALIDATION_HALF, 'test_knn_group') else: raise Exception( f'The knn method provided, {self.method} is not acceptable') self.random_dataset = random_dataset self.typical_images_dataset = typical_images_dataset self.random_dataset = ConcatDataset( [self.random_dataset, self.typical_images_dataset]) self.random_images_as_tensor = get_dataset_as_tensor( self.random_dataset)[0] self.random_points_list = None
models = [classic_model_best, topo_resnet_full, topo_resnet_transfer, superresolution_model] models_names = ["classic_model_best", "topo_resnet_full", "topo_resnet_transfer", "superresolution"] val_datasets = [europe_dataset_ordinary, europe_dataset_resnet, europe_dataset_resnet, (europe_dataset_superresolution_train, europe_dataset_superresolution)] auc_s = [] accuracies = [] import pandas as pd df = pd.DataFrame(columns=['name', 'accuracy', 'f1_micro', 'f1_macro']) with torch.no_grad(): for model, name, dataset in zip(models, models_names, val_datasets): if name == "superresolution": test_res = svm_classifier_test(model, dataset[0], dataset[1], dataset[1], 'superresolution') accuracy, f1_micro, f1_macro = test_res['svm_validation_superresolution_accuracy'], test_res[ 'svm_validation_superresolution_f1_micro'], test_res['svm_validation_superresolution_f1_macro'] else: X, y = get_dataset_as_tensor(dataset) outputs, _ = model.forward(X) probas = nn.functional.softmax(outputs).numpy() y_np = y.numpy().squeeze() f1_macro = sklearn.metrics.f1_score(y_np, np.argmax(probas, axis=1), average='macro') f1_micro = sklearn.metrics.f1_score(y_np, np.argmax(probas, axis=1), average='micro') accuracy = sklearn.metrics.accuracy_score(y_np, np.argmax(probas, axis=1)) print(f'{name}:\t \t acc:{accuracy}\t f1_micro{f1_micro}, f1_macro{f1_macro},') df.append({'name': name, 'accuracy': accuracy, 'f1_micro': f1_micro, 'f1_macro': f1_macro}, ignore_index=True) SAVE_PATH_calc_accuracies = 'results/calc_accuracies' Path(os.path.join(BASE_LOCATION, SAVE_PATH_calc_accuracies)).mkdir(parents=True, exist_ok=True) df.to_excel(os.path.join(BASE_LOCATION, SAVE_PATH_calc_accuracies, str(time.strftime('%Y-%m-%d %H:%M:%S')) + '.xlsx'))
def svm_classifier_test(model, svm_train_dataset, svm_validation_dataset, test_dataset, type_of_svm_evaluation_name, use_gpu=False): ''' Args: model: svm_train_dataset: svm_validation_dataset: test_dataset: type_of_svm_evaluation_name: use_gpu: whether this model is contrastive or not Returns: ''' X_train, y_train = get_dataset_as_tensor(svm_train_dataset) if model is not None and (use_gpu or model.hparams.use_gpu): X_train = X_train.cuda() if model is not None: # if None - we mean plain svm on the images if use_gpu: # if True - it is constractive latent_train = model.forward(X_train) else: _, latent_train = model.forward(X_train) else: latent_train = X_train.reshape(len(X_train), -1) SVMClassifier = svm.SVC(probability=True) if model is not None and (use_gpu or model.hparams.use_gpu): latent_train = latent_train.cpu() y_train = y_train.cpu() latent_train_numpy = latent_train.numpy() y_train_numpy = y_train.numpy() SVMClassifier.fit(latent_train_numpy, y_train_numpy) train_accuracy, f1_micro_train, f1_macro_train = svm_accuracy_on_dataset_in_latent_space( SVMClassifier, svm_train_dataset, model, use_gpu=use_gpu) validation_accuracy, f1_micro_validation, f1_macro_validation = svm_accuracy_on_dataset_in_latent_space( SVMClassifier, svm_validation_dataset, model, use_gpu=use_gpu) if len(test_dataset.class_names) == 2: test_accuracy, test_auc = svm_accuracy_on_dataset_in_latent_space( SVMClassifier, test_dataset, model, predict_probas=True, use_gpu=use_gpu) else: test_accuracy, test_auc = svm_accuracy_on_dataset_in_latent_space( SVMClassifier, test_dataset, model, predict_probas=True, multi=True, use_gpu=use_gpu) if model is not None: # not an svm model model.svm_validation_accuracy = validation_accuracy model.svm_test_accuracy = test_accuracy return { f'svm_train_{type_of_svm_evaluation_name}_accuracy': train_accuracy, f'svm_test_{type_of_svm_evaluation_name}_accuracy': test_accuracy, f'svm_test_{type_of_svm_evaluation_name}_auc': test_auc, f'svm_validation_{type_of_svm_evaluation_name}_accuracy': validation_accuracy, f'svm_validation_{type_of_svm_evaluation_name}_f1_micro': f1_micro_validation, f'svm_validation_{type_of_svm_evaluation_name}_f1_macro': f1_macro_validation }