def test_code_decode_with_zeros_columns(self):
        n = 50
        m = 10
        p = 0.7  #probablità che sia "prunato"
        mask = np.random.choice(a=[False, True], size=(n, m), p=[p, 1 - p])
        matr = np.random.randint(100, size=(n, m)) * (1 * mask)
        matr[:, 3] = 0
        matr[:, 6] = 0

        data, row_index, cum = sparse_huffman.convert_dense_to_csc(matr)
        d_data, d_rev_data, d_row_index, d_rev_row_index, d_cum, d_rev_cum = sparse_huffman.huffman_sparse_encoded_dict(
            data, row_index, cum)
        data_encoded, row_index_encoded, cum_encoded = sparse_huffman.encoded_matrix(
            data, d_data, d_rev_data, row_index, d_row_index, d_rev_row_index,
            cum, d_cum, d_rev_cum)
        int_data = huffman.convert_bin_to_int(
            huffman.make_words_list_to_int(data_encoded,
                                           self.bit_words_machine))
        int_row_index = huffman.convert_bin_to_int(
            huffman.make_words_list_to_int(row_index_encoded,
                                           self.bit_words_machine))
        int_cum = huffman.convert_bin_to_int(
            huffman.make_words_list_to_int(cum_encoded,
                                           self.bit_words_machine))
        expected_c = len(cum)
        dense = sparse_huffman.sparsed_encoded_to_dense(
            matr.shape, int_data, int_row_index, int_cum, d_rev_data,
            d_rev_row_index, d_rev_cum, self.bit_words_machine, expected_c)
        self.assertTrue(np.all(dense == matr))
def do_all_for_me(matr, bit_words_machine):
    """
    It takes the matrix and calls all the functions necessary to compress it
    Args:
        matr: matrix to be compressed
        bit_words_machine: machine word bit number
    returns:
        matr_shape: shape of the matrix that we compress
        int_data: list of integers representing the huffman encoding 
         of the vector data of the csc representation
        d_rev_data: dict encoded --> element
        row_index: vector of the row indices of the csc representation
        cum: vector of the number of elements of each column
        expected_c: number of columns in the matrix
        min_length_encoded: minimum length of huffman encodings
    """
    data, row_index, cum = sparse_huffman.convert_dense_to_csc(matr)
    d_data, d_rev_data = huffman_sparse_encoded_dict(data)
    data_encoded = encoded_matrix(data, d_data, d_rev_data)
    int_data = huffman.convert_bin_to_int(
        huffman.make_words_list_to_int(data_encoded, bit_words_machine))
    expected_c = len(cum)
    matr_shape = matr.shape
    min_length_encoded = huffman.min_len_string_encoded(d_rev_data)
    return matr_shape, int_data, d_rev_data, row_index, cum, expected_c, min_length_encoded
    def __init__(self, *args, **kwargs):
        super(SparseHuffmanOnlyDataTest, self).__init__(*args, **kwargs)
        n = 500
        m = 100
        self.input_x = np.random.randint(1000, size=(70, n))
        p = 0.7  #probablità che sia "prunato"
        mask = np.random.choice(a=[False, True], size=(n, m), p=[p, 1 - p])
        self.matr = np.random.randint(500, size=(n, m)) * (1 * mask)

        self.data, self.row_index, self.cum = sparse_huffman.convert_dense_to_csc(
            self.matr)

        self.d_data, self.d_rev_data = sparse_huffman_only_data.huffman_sparse_encoded_dict(
            self.data)

        data_encoded = sparse_huffman_only_data.encoded_matrix(
            self.data, self.d_data, self.d_rev_data)

        self.bit_words_machine = 64
        self.int_data = huffman.convert_bin_to_int(
            huffman.make_words_list_to_int(data_encoded,
                                           self.bit_words_machine))

        self.expected_c = len(self.cum)

        self.min_length_encoded = huffman.min_len_string_encoded(
            self.d_rev_data)
Пример #4
0
def do_all_for_me(matr, bit_words_machine):
    """
    It takes the matrix and calls all the functions necessary to compress it
    Args:
        matr: matrix to be compressed
        bit_words_machine: machine word bit number
    returns:
        matr_shape: shape of the matrix that we compress
        int_data, int_row_index, int_cum: lists of integers representing the huffman 
         coding of the vectors of the csc representation (cum contains, for each column,
          the number of non-zero values. usually a cumulative value is used)
        d_rev_data, d_rev_row_index, d_rev_cum: dicts encoded --> element
        expected_c: number of columns in the matrix
        min_length_encoded_d/r/c: minimum length of huffman encodings for each vector
    """
    data, row_index, cum = convert_dense_to_csc(matr)
    d_data, d_rev_data, d_row_index, d_rev_row_index, d_cum, d_rev_cum = huffman_sparse_encoded_dict(
        data, row_index, cum)
    data_encoded, row_index_encoded, cum_encoded = encoded_matrix(
        data, d_data, d_rev_data, row_index, d_row_index, d_rev_row_index, cum,
        d_cum, d_rev_cum)

    int_data = huffman.convert_bin_to_int(
        huffman.make_words_list_to_int(data_encoded, bit_words_machine))
    int_row_index = huffman.convert_bin_to_int(
        huffman.make_words_list_to_int(row_index_encoded, bit_words_machine))
    int_cum = huffman.convert_bin_to_int(
        huffman.make_words_list_to_int(cum_encoded, bit_words_machine))

    expected_c = len(cum)
    matr_shape = matr.shape

    min_length_encoded_c = huffman.min_len_string_encoded(d_rev_cum)
    min_length_encoded_d = huffman.min_len_string_encoded(d_rev_data)
    min_length_encoded_r = huffman.min_len_string_encoded(d_rev_row_index)

    return matr_shape, int_data, int_row_index, int_cum, d_rev_data, d_rev_row_index, d_rev_cum, expected_c, min_length_encoded_d, min_length_encoded_r, min_length_encoded_c
Пример #5
0
    def __init__(self, *args, **kwargs):
        super(HuffmanTest, self).__init__(*args, **kwargs)
        self.input_x = np.random.randint(1000, size=(1000, 500))
        self.matr = np.random.randint(500, size=(500, 250))

        symb2freq = huffman.dict_elem_freq(self.matr)
        e = huffman.encode(symb2freq)

        self.d_rev = huffman.reverse_elements_list_to_dict(e)
        self.d = dict(e)

        self.encoded = huffman.matrix_with_code(self.matr, self.d, self.d_rev)

        self.bit_words_machine = 64
        self.list_bin = huffman.make_words_list_to_int(self.encoded,
                                                       self.bit_words_machine)

        self.min_length_encoded = huffman.min_len_string_encoded(self.d_rev)