def test_normalize_kernel_matrix(self): """""" m = [ [1, 2], [2, 3], [1, 3], ] m = np.array(m) normalized_matrix = normalize_kernel_matrix(m)
def optimize_projections( *, output: str, repr_similarity_matrix, full_similarity_matrix, n_components: int, similarity_type: str, use_gpu: bool, ) -> None: """ :param output: The output folder :param repr_similarity_matrix: A square matrix with dimensions |repr| x |repr| :param full_similarity_matrix: A rectangular matrix with dimensions |full| x |repr| :param n_components: :return: """ khc = ((kernel_name, KERNEL_TO_PROJECTION[kernel_name], hyperparam) for kernel_name, hyperparams in kernels.items() for hyperparam in hyperparams) for kernel_name, project_with_kernel, hyperparam in khc: # Make output folder for the optimization with this kernel/hyper-parameter pair param_folder = os.path.join(output, f'{kernel_name}_{hyperparam}') os.makedirs(param_folder, exist_ok=True) secho( f"({kernel_name}/{hyperparam}) calculating normalized/symmetric kernel matrix" ) repr_kernel_matrix = project_with_kernel(repr_similarity_matrix, hyperparam) repr_kernel_matrix_normalized = normalize_kernel_matrix( repr_kernel_matrix) secho( f"({kernel_name}/{hyperparam}) solving eigenvector/eigenvalues problem" ) eigenvalues, eigenvectors = eigh(repr_kernel_matrix_normalized) # Calculate alphas repr_alphas = np.column_stack( [eigenvectors[:, -i] for i in range(1, n_components + 1)]) # Save Alphas _alphas_path = os.path.join(param_folder, f"alphas.p") secho( f"({kernel_name}/{hyperparam}) outputting alphas to {_alphas_path}" ) with open(_alphas_path, "wb") as file: pickle.dump(repr_alphas, file) # Calculate lambdas repr_lambdas = [eigenvalues[-i] for i in range(1, n_components + 1)] # Save lambdas _lambdas_path = os.path.join(param_folder, f"lambdas.p") secho( f"({kernel_name}/{hyperparam}) outputting lambdas to {_lambdas_path}" ) with open(_lambdas_path, 'wb') as file: pickle.dump(repr_lambdas, file) secho( f"({kernel_name}/{hyperparam}) projecting known vocabulary to KPCA embeddings" ) repr_projection_matrix = repr_alphas / repr_lambdas # Calculate KPCA matrix if similarity_type == "ngram_intersec": # There is no additional kernel function on top of the similarity function kpca_matrix = project_full_vocab_linear( projection_matrix=repr_projection_matrix, similarity_matrix=full_similarity_matrix, ) elif use_gpu: kpca_matrix = project_words_gpu( projection_matrix=repr_projection_matrix, similarity_matrix=full_similarity_matrix, kernel_name=kernel_name, hyperparam=hyperparam, ) else: kpca_matrix = project_similarity_matrix( projection_matrix=repr_projection_matrix, similarity_matrix=full_similarity_matrix, kernel_name=kernel_name, hyperparam=hyperparam, ) # Save KPCA matrix _kpca_path = os.path.join(param_folder, f"kpca.npy") secho( f"({kernel_name}/{hyperparam}) outputting KPCA matrix to {_kpca_path}" ) np.save(_kpca_path, kpca_matrix)