Exemplo n.º 1
0
def loo_crossval_naive(graph_database, wl_iter_range, param_2_range, quality_function, output_dir, base_model={}, shingles_type="features", window_size=5, accumulate_wl_shingles=True):
    '''Similar to loo_crossval_sketch but computes directly the Jaccard
    similarities between the columns in the characteristic matrix,
    without using a sketch matrix. Not applicable for big datasets.
    '''
    best_model = model_p(-1, -1, -1, base_model=base_model)
    cols_count = len(graph_database)
    
    models_file = open(output_dir + "models_naive", "a")
    
    for wl_iterations in wl_iter_range:
        ch_matrix = CharacteristicMatrix(graph_database, cols_count, wl_iterations=wl_iterations, shingles_type=shingles_type, window_size=window_size, accumulate_wl_shingles=accumulate_wl_shingles)
        jaccard_similarity_matrix = ch_matrix.compute_jaccard_similarity_matrix()
        for p in param_2_range:
            avg_quality = 0.
            for i in range(cols_count):
                avg_quality += float(quality_function(i, jaccard_similarity_matrix, p))
            avg_quality /= cols_count
            current_model = model_p(avg_quality, wl_iterations, p, base_model=base_model)
            print current_model
            models_file.write(str(current_model) + ",\n")
            models_file.flush()
            if avg_quality > best_model["quality"]:
                best_model = current_model
    
    if not base_model:
        # print best model when there are no outer parameters
        models_file.write("Best model: " + str(best_model) + "\n")
    
    models_file.close()
    
    return best_model
 def testCharacteristicMatrix_JaccardSimMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     equality = (self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all()
     self.assertTrue(equality, "The computed Jaccard similarity matrix is wrong.")
 def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database, nodes_count, wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8, dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(equality, "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.).")
 def testCharacteristicMatrix_JaccardSimMatrix(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     equality = (
         self.ch_matrix_jaccard_sim_exp == ch_matrix_jaccard_sim).all()
     self.assertTrue(equality,
                     "The computed Jaccard similarity matrix is wrong.")
Exemplo n.º 5
0
def loo_crossval_naive(graph_database,
                       wl_iter_range,
                       param_2_range,
                       quality_function,
                       output_dir,
                       base_model={},
                       shingles_type="features",
                       window_size=5,
                       accumulate_wl_shingles=True):
    '''Similar to loo_crossval_sketch but computes directly the Jaccard
    similarities between the columns in the characteristic matrix,
    without using a sketch matrix. Not applicable for big datasets.
    '''
    best_model = model_p(-1, -1, -1, base_model=base_model)
    cols_count = len(graph_database)

    models_file = open(output_dir + "models_naive", "a")

    for wl_iterations in wl_iter_range:
        ch_matrix = CharacteristicMatrix(
            graph_database,
            cols_count,
            wl_iterations=wl_iterations,
            shingles_type=shingles_type,
            window_size=window_size,
            accumulate_wl_shingles=accumulate_wl_shingles)
        jaccard_similarity_matrix = ch_matrix.compute_jaccard_similarity_matrix(
        )
        for p in param_2_range:
            avg_quality = 0.
            for i in range(cols_count):
                avg_quality += float(
                    quality_function(i, jaccard_similarity_matrix, p))
            avg_quality /= cols_count
            current_model = model_p(avg_quality,
                                    wl_iterations,
                                    p,
                                    base_model=base_model)
            print current_model
            models_file.write(str(current_model) + ",\n")
            models_file.flush()
            if avg_quality > best_model["quality"]:
                best_model = current_model

    if not base_model:
        # print best model when there are no outer parameters
        models_file.write("Best model: " + str(best_model) + "\n")

    models_file.close()

    return best_model
 def testSimilarNodesMining(self):
     dummy_hypergraph = Hypergraph(example_graphs.snm_dummy_graph)
     rballs_database, _ = similar_nodes_mining.extract_rballs_database(
         dummy_hypergraph, r_in=3, r_out=2, r_all=0)
     nodes_count = dummy_hypergraph.number_of_nodes()
     ch_matrix = CharacteristicMatrix(rballs_database,
                                      nodes_count,
                                      wl_iterations=0)
     ch_matrix_jaccard_sim = ch_matrix.compute_jaccard_similarity_matrix()
     similarity_matrix_exp = np.array(ch_matrix_jaccard_sim >= 0.8,
                                      dtype=np.float32)
     sketch_matrix = SketchMatrix(25, 265, ch_matrix)
     similarity_matrix = similar_nodes_mining.get_node_similarity_matrix(
         sketch_matrix)
     equality = (similarity_matrix_exp == similarity_matrix).all()
     self.assertTrue(
         equality,
         "The computed similarity matrix is wrong (Keep in mind that the sketch_matrix is probabilistic, therefore, it may not be always correct. The test may pass in another run.)."
     )