def evaluate(model, dataloader, nb_classes): model_is_training = model.training model.eval() # calculate embeddings with model, also get labels (non-batch-wise) X, T = predict_batchwise(model, dataloader) # calculate NMI with kmeans clustering nmi = evaluation.calc_normalized_mutual_information( T, evaluation.cluster_by_kmeans( X, nb_classes ) ) logging.info("NMI: {:.3f}".format(nmi * 100)) # get predictions by assigning nearest 8 neighbors with euclidian Y = evaluation.assign_by_euclidian_at_k(X, T, 8) # calculate recall @ 1, 2, 4, 8 recall = [] for k in [1, 2, 4, 8]: r_at_k = evaluation.calc_recall_at_k(T, Y, k) recall.append(r_at_k) logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k)) model.train(model_is_training) # revert to previous training state return nmi, recall
def evaluate(model, dataloader, nb_classes, net_type='bn_inception', dataroot='CARS'): model_is_training = model.training model.eval() # calculate embeddings with model, also get labels (non-batch-wise) X, T = predict_batchwise(model, dataloader, net_type) if dataroot != 'Stanford': # calculate NMI with kmeans clustering nmi = evaluation.calc_normalized_mutual_information( T, evaluation.cluster_by_kmeans(X, nb_classes)) logging.info("NMI: {:.3f}".format(nmi * 100)) else: nmi = -1 recall = [] if dataroot != 'Stanford': Y = evaluation.assign_by_euclidian_at_k(X, T, 8) which_nearest_neighbors = [1, 2, 4, 8] else: Y = evaluation.assign_by_euclidian_at_k(X, T, 1000) which_nearest_neighbors = [1, 10, 100, 1000] for k in which_nearest_neighbors: r_at_k = evaluation.calc_recall_at_k(T, Y, k) recall.append(r_at_k) logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k)) model.train(model_is_training) # revert to previous training state return nmi, recall
def evaluate(model, dataloader, with_nmi = True): nb_classes = dataloader.dataset.nb_classes() # calculate embeddings with model and get targets X, T, *_ = predict_batchwise(model, dataloader) if with_nmi: # calculate NMI with kmeans clustering nmi = evaluation.calc_normalized_mutual_information( T, evaluation.cluster_by_kmeans( X, nb_classes ) ) logging.info("NMI: {:.3f}".format(nmi * 100)) # get predictions by assigning nearest 8 neighbors with euclidian Y = evaluation.assign_by_euclidian_at_k(X, T, 8) Y = torch.from_numpy(Y) # calculate recall @ 1, 2, 4, 8 recall = [] for k in [1, 2, 4, 8]: r_at_k = evaluation.calc_recall_at_k(T, Y, k) recall.append(r_at_k) logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k)) if with_nmi: return recall, nmi else: return recall
def evaluate(model, dataloader=None, fc7=None, batch=None, calc_nmi=False): nb_classes = model.nb_classes model_is_training = model.training model.eval() # calculate embeddings with model, also get labels emb, labels = predict_batchwise(model, dataloader=dataloader, fc7=fc7, batch=batch) nmi = None if dataloader is not None and calc_nmi: nmi = evaluation.calc_normalized_mutual_information( labels, evaluation.cluster_by_kmeans(emb, nb_classes)) recall = [] # rank the nearest neighbors for each input k_pred_labels = evaluation.assign_by_euclidian_at_k(emb, labels, 1000) if batch is None: which_nearest_neighbors = [1, 10, 100, 1000] else: which_nearest_neighbors = [1] for k in which_nearest_neighbors: r_at_k = evaluation.calc_recall_at_k(labels, k_pred_labels, k) recall.append(r_at_k) logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k)) if model_is_training: model.train() # revert to previous training state return recall, nmi
def on_epoch_end(self, epoch, logs={}): if epoch % self.interval == 0: get_intermediate_layer_output = backend.function( [self.model.input], [self.model.get_layer("predictions").output]) y_given = [] y_embedding = [] nb_classes = 0 print("Before getting validation samples:") print(datetime.datetime.now().time()) for i in range(128): X_val_temp, y_val_temp = self.validation_data.next() nb_classes = y_val_temp.shape[1] y_given.append(y_val_temp) y_embedding_temp = get_intermediate_layer_output([X_val_temp ])[0] y_embedding.append(y_embedding_temp) print("After getting validation samples:") print(datetime.datetime.now().time()) y_embedding = np.concatenate(y_embedding, axis=0) y_given = np.concatenate(y_given, axis=0) y_given_class_order = np.argsort(y_given, axis=-1) y_given_class = np.transpose(y_given_class_order)[-1] nmi = evaluation.calc_normalized_mutual_information( y_given_class, evaluation.cluster_by_kmeans(y_embedding, nb_classes)) logging.info("NMI: {:.3f}".format(nmi * 100)) # get predictions by assigning nearest 8 neighbors with euclidian Y = evaluation.assign_by_euclidian_at_k(y_embedding, y_given_class, 8) # calculate recall @ 1, 2, 4, 8 recall = [] for k in [1, 2, 4, 8]: r_at_k = evaluation.calc_recall_at_k(y_given_class, Y, k) recall.append(r_at_k) logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k)) return nmi, recall
def evaluate_inshop(model, dl_query, dl_gallery, K=[1, 10, 20, 30, 40, 50], with_nmi=False): # calculate embeddings with model and get targets X_query, T_query, *_ = predict_batchwise_inshop(model, dl_query) X_gallery, T_gallery, *_ = predict_batchwise_inshop(model, dl_gallery) nb_classes = dl_query.dataset.nb_classes() assert nb_classes == len(set(T_query)) #assert nb_classes == len(T_query.unique()) # calculate full similarity matrix, choose only first `len(X_query)` rows # and only last columns corresponding to the column T_eval = torch.cat( [torch.from_numpy(T_query), torch.from_numpy(T_gallery)]) X_eval = torch.cat( [torch.from_numpy(X_query), torch.from_numpy(X_gallery)]) D = similarity.pairwise_distance(X_eval)[:len(X_query), len(X_query):] #D = torch.from_numpy(D) # get top k labels with smallest (`largest = False`) distance Y = T_gallery[D.topk(k=max(K), dim=1, largest=False)[1]] recall = [] for k in K: r_at_k = evaluation.calc_recall_at_k(T_query, Y, k) recall.append(r_at_k) logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k)) if with_nmi: # calculate NMI with kmeans clustering nmi = evaluation.calc_normalized_mutual_information( T_eval.numpy(), evaluation.cluster_by_kmeans(X_eval.numpy(), nb_classes)) else: nmi = 1 logging.info("NMI: {:.3f}".format(nmi * 100)) return nmi, recall
def evaluate(model, dataloader, eval_nmi=True, recall_list=[1, 2, 4, 8]): eval_time = time.time() nb_classes = dataloader.dataset.nb_classes() # calculate embeddings with model and get targets X, T, *_ = predict_batchwise(model, dataloader) print('done collecting prediction') #eval_time = time.time() - eval_time #logging.info('Eval time: %.2f' % eval_time) if eval_nmi: # calculate NMI with kmeans clustering nmi = evaluation.calc_normalized_mutual_information( T, evaluation.cluster_by_kmeans(X, nb_classes)) else: nmi = 1 logging.info("NMI: {:.3f}".format(nmi * 100)) # get predictions by assigning nearest 8 neighbors with euclidian max_dist = max(recall_list) Y = evaluation.assign_by_euclidian_at_k(X, T, max_dist) Y = torch.from_numpy(Y) # calculate recall @ 1, 2, 4, 8 recall = [] for k in recall_list: r_at_k = evaluation.calc_recall_at_k(T, Y, k) recall.append(r_at_k) logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k)) chmean = (2 * nmi * recall[0]) / (nmi + recall[0]) logging.info("hmean: %s", str(chmean)) eval_time = time.time() - eval_time logging.info('Eval time: %.2f' % eval_time) return nmi, recall
def evaluate(model, dataloader, nb_classes): model_is_training = model.training model.eval() # calculate embeddings with model, also get labels (non-batch-wise) X, T = predict_batchwise(model, dataloader) # calculate NMI with kmeans clustering nmi = evaluation.calc_normalized_mutual_information( T, evaluation.cluster_by_kmeans(X, nb_classes)) logging.info("NMI: {:.3f}".format(nmi * 100)) # get predictions by assigning nearest 8 neighbors with euclidian Y = evaluation.assign_by_euclidian_at_k(X, T, 8) # calculate recall @ 1, 2, 4, 8 recall = [] for k in [1, 2, 4, 8]: r_at_k = evaluation.calc_recall_at_k(T, Y, k) recall.append(r_at_k) logging.info("R@{} : {:.3f}".format(k, 100 * r_at_k)) model.train(model_is_training) # revert to previous training state return nmi, recall
def validation_epoch_end(self, outputs: Dict[str, Any]) -> None: """Compute metrics on the full validation set. Args: outputs (Dict[str, Any]): Dict of values collected over each batch put through model.eval()(..) """ val_Xs = torch.cat([h["Xs"] for h in outputs]) val_Ts = torch.cat([h["Ts"] for h in outputs]) val_indexes = torch.cat([h["index"] for h in outputs]) Y = assign_by_euclidian_at_k(val_Xs.cpu(), val_Ts.cpu(), 8) Y = torch.from_numpy(Y) # Return early when PL is running the sanity check. if self.trainer.running_sanity_check: return # Compute and Log R@k recall = [] logs = {} for k in [1, 2, 4, 8]: r_at_k = 100 * calc_recall_at_k(val_Ts.cpu(), Y, k) recall.append(r_at_k) logs[f"val_R@{k}"] = r_at_k self.log_dict(logs) # Compute and log NMI nmi = 100 * calc_normalized_mutual_information( val_Ts.cpu(), cluster_by_kmeans(val_Xs.cpu(), self.hparams.num_classes) ) self.log_dict({"NMI": nmi}) # Inspect the embedding space in 2 and 3 dimensions. if 2 in self.hparams.vis_dim: pca = PCA(2) projected = pca.fit_transform(val_Xs.cpu()) proxies = pca.transform(self.proxies.detach().cpu()) fig_embedded_data = go.Figure() for cls_idx, cls_name in enumerate(self.val_dataset.classes): x_s = [ o for i, o in enumerate(projected[:, 0]) if self.val_dataset.get_label_description(Y[i, 0]) == cls_name ] y_s = [ o for i, o in enumerate(projected[:, 1]) if self.val_dataset.get_label_description(Y[i, 0]) == cls_name ] marker_color = colors_by_name[cls_idx % len(colors_by_name)] fig_embedded_data.add_scatter( x=x_s, y=y_s, marker_color=marker_color, text=cls_name, name=cls_name, mode="markers", ) wandb.log({"Embedding of Validation Dataset 2D": fig_embedded_data}) fig_embedded_proxies = go.Figure() for cls_name, x_y in zip(self.val_dataset.classes, proxies): x_s = [ o for i, o in enumerate(proxies[:, 0]) if self.val_dataset.get_label_description(Y[i, 0]) == cls_name ] y_s = [ o for i, o in enumerate(proxies[:, 1]) if self.val_dataset.get_label_description(Y[i, 0]) == cls_name ] marker_color = colors_by_name[ self.val_dataset.classes.index(cls_name) % len(colors_by_name) ] fig_embedded_proxies.add_scatter( x=[x_y[0]], y=[x_y[1]], marker_color=marker_color, text=cls_name, name=cls_name, mode="markers", ) wandb.log( {"Embedding of Proxies (on validation data) 2D": fig_embedded_proxies} ) if 3 in self.hparams.vis_dim: pca = PCA(3) projected = pca.fit_transform(val_Xs.cpu()) proxies = pca.transform(self.proxies.detach().cpu()) fig_embedded_data = go.Figure() for cls_idx, cls_name in enumerate(self.val_dataset.classes): x_s = [ o for i, o in enumerate(projected[:, 0]) if self.val_dataset.get_label_description(Y[i, 0]) == cls_name ] y_s = [ o for i, o in enumerate(projected[:, 1]) if self.val_dataset.get_label_description(Y[i, 0]) == cls_name ] z_s = [ o for i, o in enumerate(projected[:, 2]) if self.val_dataset.get_label_description(Y[i, 0]) == cls_name ] marker_color = colors_by_name[cls_idx % len(colors_by_name)] fig_embedded_data.add_scatter3d( x=x_s, y=y_s, z=z_s, marker_color=marker_color, text=cls_name, name=cls_name, mode="markers", ) wandb.log({"Embedding of Validation Dataset 3D": fig_embedded_data}) fig_embedded_proxies = go.Figure() for cls_name, x_y_z in zip(self.val_dataset.classes, proxies): marker_color = colors_by_name[ self.val_dataset.classes.index(cls_name) % len(colors_by_name) ] fig_embedded_proxies.add_scatter3d( x=[x_y_z[0]], y=[x_y_z[1]], z=[x_y_z[2]], marker_color=marker_color, text=cls_name, name=cls_name, mode="markers", ) wandb.log( {"Embedding of Proxies (on validation data) 3D": fig_embedded_proxies} ) cm = confusion_matrix( y_true=val_Ts.cpu().numpy(), y_pred=Y[:, 0].cpu().numpy(), labels=[o for o in range(0, len(self.val_dataset.classes))], ) fig_cm = ff.create_annotated_heatmap( cm, x=self.val_dataset.classes, y=self.val_dataset.classes, annotation_text=cm.astype(str), colorscale="Viridis", ) wandb.log({"Confusion Matrix": fig_cm}) # Log a query and top 4 selction image_dict = {} top_k_indices = torch.cdist(val_Xs, val_Xs).topk(5, largest=False).indices max_idx = len(top_k_indices) - 1 for i, example_result in enumerate( top_k_indices[[randint(0, max_idx) for _ in range(0, 5)]] ): image_dict[f"global step {self.global_step} example: {i}"] = [ wandb.Image( Image.open( self.val_dataset.im_paths[val_indexes[example_result[0]]] ), caption=f"query: {self.val_dataset.get_label_description(self.val_dataset.get_label(val_indexes[example_result[0]]))}", ) ] image_dict[f"global step {self.global_step} example: {i}"].extend( [ wandb.Image( Image.open(self.val_dataset.im_paths[val_indexes[idx]]), caption=f"retrival:({rank}) {self.val_dataset.get_label_description(self.val_dataset.get_label(val_indexes[idx]))}", ) for rank, idx in enumerate(example_result[1:]) ] ) self.logger.experiment.log(image_dict) # Since validation set samples are iid I prefer looking at a histogram of validation losses. wandb.log( {f"val_loss_hist": wandb.Histogram([[h["val_loss"] for h in outputs]])} )