print("Try to use hierarchical clustering on the embeddings") records = 50 data, _, _ = c_nn.data_provider.get_data( elements_per_cluster_collection=records, data_type='test', cluster_collection_count=1) x_data, _ = c_nn._build_Xy_data(data, ignore_length=True) i_data = c_nn.data_to_cluster_indices(data) # Only use the first cluster collection x_data = list(map(lambda x: x[0], x_data[:-1])) i_data = i_data[0] # 2) Do the test mrs, homogeneity_scores, completeness_scores, thresholds = hierarchical_clustering( x_data, i_data, c_nn, plot_filename=output_dir + '/{:02d}_rand_example_hierarchical_clustering.png'.format(i)) # 3) Also do the test with the forward pass dropout data mrs, homogeneity_scores, completeness_scores, thresholds = hierarchical_clustering( fd_x_data, fd_i_data, c_nn, plot_filename=current_output_dir + '/example_hierarchical_clustering.png') tests.append({ 'directory': current_output_dir, 'fd_data': (fd_data, fd_additional_obj_info, fd_hints) })
# 1) Generate some data (e.g. 50 records) print("Try to use hierarchical clustering on the embeddings") records = 50 data, _, _ = c_nn.data_provider.get_data(elements_per_cluster_collection=records, data_type='test', cluster_collection_count=1) x_data, _ = c_nn._build_Xy_data(data, ignore_length=True) i_data = c_nn.data_to_cluster_indices(data) # Only use the first cluster collection x_data = list(map(lambda x: x[0], x_data[:-1])) i_data = i_data[0] # If no embedding is used, the hierarchical clustering test is useless if en is not None: # 2) Do the test hierarchical_clustering( x_data, i_data, c_nn, plot_filename=output_dir + '/{:02d}_rand_example_hierarchical_clustering.png'.format(i) ) hierarchical_clustering( x_data, i_data, c_nn, plot_filename=output_dir + '/{:02d}_rand_example_hierarchical_clustering_euclidean.png'.format(i), metric='euclidean' ) # 3) Also do the test with the forward pass dropout data hierarchical_clustering( fd_x_data, fd_i_data, c_nn, plot_filename=current_output_dir + '/example_hierarchical_clustering_cosine.png' ) hierarchical_clustering( fd_x_data, fd_i_data, c_nn, plot_filename=current_output_dir + '/example_hierarchical_clusterin_euclidean.png', metric='euclidean' )
data, _, _ = c_nn.data_provider.get_data(elements_per_cluster_collection=records, data_type='test', cluster_collection_count=1) x_data, _ = c_nn._build_Xy_data(data, ignore_length=True) i_data = c_nn.data_to_cluster_indices(data) x_data = list(map(lambda x: x[0], x_data[:-1])) i_data = i_data[0] x = measure_cluster_count_uncertainity(c_nn, x_data, show_progress=True, output_directory='G:/tmp/test/measure_cluster_count_uncertainity', input_permutation=True, forward_pass_dropout=True) # Hierarchical Clustering from core.nn.misc.hierarchical_clustering import hierarchical_clustering # Generate data records = 50 data, _, _ = c_nn.data_provider.get_data(elements_per_cluster_collection=records, data_type='test', cluster_collection_count=1) x_data, _ = c_nn._build_Xy_data(data, ignore_length=True) i_data = c_nn.data_to_cluster_indices(data) # Only use the first cluster collection x_data = list(map(lambda x: x[0], x_data[:-1])) i_data = i_data[0] mrs, homogeneity_scores, completeness_scores, thresholds = hierarchical_clustering( x_data, i_data, c_nn, plot_filename='G:/tmp/test/measure_cluster_count_uncertainity/out.png' ) print(mrs) pass # Do a dummy prediction