def encode_header(self) -> str: """ Encode string into Header. - Encode using variable-length Elias, the no of unique ASCII characters. - For each unique char -- Encode using fixed-length 8-bit ASCII code the unique char -- Encode using variable-length Elias code the length of the Huffman code assigned to that unique char """ result = "" unique_chars = huffman_coding.unique_chars(self.string) unique_chars_list = list(unique_chars) unique_chars_list.sort() no_of_unique = len(unique_chars_list) result += elias_encoder.encode_single_value(no_of_unique) for char in unique_chars_list: ascii_8_bit = self.convert_to_8_bit_ascii(char) huffman_code = self.huffman_values[char] encoded_huffman_len = elias_encoder.encode_single_value( len(huffman_code)) result += ascii_8_bit result += encoded_huffman_len result += huffman_code return result
def test_joined(self): code = "" for i in range(1, 1000): code += elias_encoder.encode_single_value(i) for i in range(1, 1000): decoded = elias_decoder.decode_single_value(code) code = code[len(elias_encoder.encode_single_value(i)):] assert i == decoded
def encode_header(code: str) -> str: res = unique_chars(code) # Total number of unique characters result = elias_encoder.encode_single_value(len(res)) for val in res: # Frequency of each character result += elias_encoder.encode_single_value(res[val]) # ASCII code of character padded to 3 digits. result += "{0:0=3d}".format(ord(val)) # Total number of characters in the input file. result += elias_encoder.encode_single_value(len(code)) return result
def decode(code: str) -> str: # Calculate no of unique and remove from code. no_of_unique = elias_decoder.decode_single_value(code) no_of_unique_encoded = elias_encoder.encode_single_value(no_of_unique) code = code[len(no_of_unique_encoded) :] frequencies = {} for i in range(no_of_unique): freq = elias_decoder.decode_single_value(code) freq_encoded = elias_encoder.encode_single_value(freq) code = code[len(freq_encoded) :] char = chr(int(code[0:3])) code = code[3:] frequencies[char] = freq total_chars = elias_decoder.decode_single_value(code) total_chars_encoded = elias_encoder.encode_single_value(total_chars) code = code[len(total_chars_encoded) :] heap: List[Node] = [] for val in frequencies: node = Node(val, frequencies[val]) heapq.heappush(heap, node) root = heap[0] while len(heap) > 1: min1: Node = heapq.heappop(heap) min2: Node = heapq.heappop(heap) new_node = Node(min1.char + min2.char, min1.count + min2.count) new_node.left = min1 new_node.right = min2 root = new_node heapq.heappush(heap, new_node) result = "" for i in range(total_chars): res = search_for_char(root, "", code, -1) result += res[1] code = code[len(res[0]) :] return result
def encode_data(self) -> str: result = "" lzss_encoding = lzss_encoder_class.LZSSEncoder( self.string, self.window_size, self.buffer_size).encode() result += elias_encoder.encode_single_value(len(lzss_encoding)) for encoding in lzss_encoding: if encoding[0] == 0: result += "0" result += elias_encoder.encode_single_value( encoding[1] # type: ignore ) result += elias_encoder.encode_single_value( encoding[2] # type: ignore ) else: result += "1" huffman_code = self.huffman_values[encoding[1]] # type: ignore result += huffman_code return result
def test_from_lecture_slides(self): """Test using example from lecture slides.""" expected = "00100011000110001" assert elias_encoder.encode_single_value(561) == expected
def test_more_values(self): expected = "1" assert elias_encoder.encode_single_value(1) == expected expected = "010" assert elias_encoder.encode_single_value(2) == expected expected = "011" assert elias_encoder.encode_single_value(3) == expected expected = "000100" assert elias_encoder.encode_single_value(4) == expected expected = "000101" assert elias_encoder.encode_single_value(5) == expected expected = "000110" assert elias_encoder.encode_single_value(6) == expected expected = "000111" assert elias_encoder.encode_single_value(7) == expected expected = "0011000" assert elias_encoder.encode_single_value(8) == expected expected = "0011001" assert elias_encoder.encode_single_value(9) == expected expected = "0011010" assert elias_encoder.encode_single_value(10) == expected expected = "0011111" assert elias_encoder.encode_single_value(15) == expected expected = "00000010000" assert elias_encoder.encode_single_value(16) == expected
def test_all(self): for i in range(1, 100000): code = elias_encoder.encode_single_value(i) decoded = elias_decoder.decode_single_value(code) assert i == decoded
def decode(self) -> str: """Decode the LZSS Encoded code.""" code = self.code # Decode Header # Decode the number of unique ASCII characters. no_of_unique = elias_decoder.decode_single_value(code) code_to_remove = elias_encoder.encode_single_value(no_of_unique) code = code[len(code_to_remove) :] for i in range(no_of_unique): # Decode the fixed-length 8-bit ASCII code of the unique char. unique_char_code = code[0:8] unique_char = chr(int(unique_char_code, 2)) code = code[8:] # Decode the length of the variable-length Elias code of the # length of the Huffman code used for the unique char. length_of_huffman = elias_decoder.decode_single_value(code) code_to_remove = elias_encoder.encode_single_value( length_of_huffman ) code = code[len(code_to_remove) :] # Retrieve the Huffman codeword for that unique char. huffman_code = code[0:length_of_huffman] code = code[length_of_huffman:] self.huffman_coding[unique_char] = huffman_code # Decode Data no_of_lzss_encodings = elias_decoder.decode_single_value(code) code_to_remove = elias_encoder.encode_single_value( no_of_lzss_encodings ) code = code[len(code_to_remove) :] lzss_encodings: List[EncodingType] = [] for i in range(no_of_lzss_encodings): # Check first bit of LZSS encoding format_bit = int(code[0]) code = code[1:] if format_bit == 0: # Decode Offset offset = elias_decoder.decode_single_value(code) code_to_remove = elias_encoder.encode_single_value(offset) code = code[len(code_to_remove) :] # Decode length length = elias_decoder.decode_single_value(code) code_to_remove = elias_encoder.encode_single_value(length) code = code[len(code_to_remove) :] lzss_encodings.append((0, offset, length)) elif format_bit == 1: char, huffman_code = self.search_for_char(code) code = code[len(huffman_code) :] lzss_encodings.append((1, char)) else: raise Exception decoder = lzss_decoder_class.LZSSDecoder(lzss_encodings) return decoder.decode()