def do_all_for_me(matr, bit_words_machine): """ It takes the matrix and calls all the functions necessary to compress it Args: matr: matrix to be compressed bit_words_machine: machine word bit number returns: matr_shape: shape of the matrix that we compress int_data: list of integers representing the huffman encoding of the vector data of the csc representation d_rev_data: dict encoded --> element row_index: vector of the row indices of the csc representation cum: vector of the number of elements of each column expected_c: number of columns in the matrix min_length_encoded: minimum length of huffman encodings """ data, row_index, cum = sparse_huffman.convert_dense_to_csc(matr) d_data, d_rev_data = huffman_sparse_encoded_dict(data) data_encoded = encoded_matrix(data, d_data, d_rev_data) int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, bit_words_machine)) expected_c = len(cum) matr_shape = matr.shape min_length_encoded = huffman.min_len_string_encoded(d_rev_data) return matr_shape, int_data, d_rev_data, row_index, cum, expected_c, min_length_encoded
def test_code_decode_with_zeros_columns(self): n = 50 m = 10 p = 0.7 #probablità che sia "prunato" mask = np.random.choice(a=[False, True], size=(n, m), p=[p, 1 - p]) matr = np.random.randint(100, size=(n, m)) * (1 * mask) matr[:, 3] = 0 matr[:, 6] = 0 data, row_index, cum = sparse_huffman.convert_dense_to_csc(matr) d_data, d_rev_data, d_row_index, d_rev_row_index, d_cum, d_rev_cum = sparse_huffman.huffman_sparse_encoded_dict( data, row_index, cum) data_encoded, row_index_encoded, cum_encoded = sparse_huffman.encoded_matrix( data, d_data, d_rev_data, row_index, d_row_index, d_rev_row_index, cum, d_cum, d_rev_cum) int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, self.bit_words_machine)) int_row_index = huffman.convert_bin_to_int( huffman.make_words_list_to_int(row_index_encoded, self.bit_words_machine)) int_cum = huffman.convert_bin_to_int( huffman.make_words_list_to_int(cum_encoded, self.bit_words_machine)) expected_c = len(cum) dense = sparse_huffman.sparsed_encoded_to_dense( matr.shape, int_data, int_row_index, int_cum, d_rev_data, d_rev_row_index, d_rev_cum, self.bit_words_machine, expected_c) self.assertTrue(np.all(dense == matr))
def __init__(self, *args, **kwargs): super(SparseHuffmanOnlyDataTest, self).__init__(*args, **kwargs) n = 500 m = 100 self.input_x = np.random.randint(1000, size=(70, n)) p = 0.7 #probablità che sia "prunato" mask = np.random.choice(a=[False, True], size=(n, m), p=[p, 1 - p]) self.matr = np.random.randint(500, size=(n, m)) * (1 * mask) self.data, self.row_index, self.cum = sparse_huffman.convert_dense_to_csc( self.matr) self.d_data, self.d_rev_data = sparse_huffman_only_data.huffman_sparse_encoded_dict( self.data) data_encoded = sparse_huffman_only_data.encoded_matrix( self.data, self.d_data, self.d_rev_data) self.bit_words_machine = 64 self.int_data = huffman.convert_bin_to_int( huffman.make_words_list_to_int(data_encoded, self.bit_words_machine)) self.expected_c = len(self.cum) self.min_length_encoded = huffman.min_len_string_encoded( self.d_rev_data)