Пример #1
0
    def encode_header(self) -> str:
        """
        Encode string into Header.
        - Encode using variable-length Elias, the no of unique ASCII characters.
        - For each unique char
        -- Encode using fixed-length 8-bit ASCII code the unique char
        -- Encode using variable-length Elias code the length of the Huffman code
           assigned to that unique char
        """
        result = ""

        unique_chars = huffman_coding.unique_chars(self.string)
        unique_chars_list = list(unique_chars)
        unique_chars_list.sort()
        no_of_unique = len(unique_chars_list)

        result += elias_encoder.encode_single_value(no_of_unique)

        for char in unique_chars_list:
            ascii_8_bit = self.convert_to_8_bit_ascii(char)
            huffman_code = self.huffman_values[char]
            encoded_huffman_len = elias_encoder.encode_single_value(
                len(huffman_code))

            result += ascii_8_bit
            result += encoded_huffman_len
            result += huffman_code

        return result
Пример #2
0
 def test_joined(self):
     code = ""
     for i in range(1, 1000):
         code += elias_encoder.encode_single_value(i)
     for i in range(1, 1000):
         decoded = elias_decoder.decode_single_value(code)
         code = code[len(elias_encoder.encode_single_value(i)):]
         assert i == decoded
Пример #3
0
def encode_header(code: str) -> str:
    res = unique_chars(code)
    # Total number of unique characters
    result = elias_encoder.encode_single_value(len(res))
    for val in res:
        # Frequency of each character
        result += elias_encoder.encode_single_value(res[val])
        # ASCII code of character padded to 3 digits.
        result += "{0:0=3d}".format(ord(val))
    # Total number of characters in the input file.
    result += elias_encoder.encode_single_value(len(code))
    return result
Пример #4
0
def decode(code: str) -> str:
    # Calculate no of unique and remove from code.
    no_of_unique = elias_decoder.decode_single_value(code)
    no_of_unique_encoded = elias_encoder.encode_single_value(no_of_unique)
    code = code[len(no_of_unique_encoded) :]

    frequencies = {}
    for i in range(no_of_unique):
        freq = elias_decoder.decode_single_value(code)
        freq_encoded = elias_encoder.encode_single_value(freq)
        code = code[len(freq_encoded) :]
        char = chr(int(code[0:3]))
        code = code[3:]
        frequencies[char] = freq

    total_chars = elias_decoder.decode_single_value(code)
    total_chars_encoded = elias_encoder.encode_single_value(total_chars)
    code = code[len(total_chars_encoded) :]

    heap: List[Node] = []
    for val in frequencies:
        node = Node(val, frequencies[val])
        heapq.heappush(heap, node)
    root = heap[0]

    while len(heap) > 1:
        min1: Node = heapq.heappop(heap)
        min2: Node = heapq.heappop(heap)
        new_node = Node(min1.char + min2.char, min1.count + min2.count)
        new_node.left = min1
        new_node.right = min2
        root = new_node

        heapq.heappush(heap, new_node)

    result = ""
    for i in range(total_chars):
        res = search_for_char(root, "", code, -1)
        result += res[1]
        code = code[len(res[0]) :]

    return result
Пример #5
0
    def encode_data(self) -> str:
        result = ""
        lzss_encoding = lzss_encoder_class.LZSSEncoder(
            self.string, self.window_size, self.buffer_size).encode()
        result += elias_encoder.encode_single_value(len(lzss_encoding))

        for encoding in lzss_encoding:
            if encoding[0] == 0:
                result += "0"
                result += elias_encoder.encode_single_value(
                    encoding[1]  # type: ignore
                )
                result += elias_encoder.encode_single_value(
                    encoding[2]  # type: ignore
                )
            else:
                result += "1"
                huffman_code = self.huffman_values[encoding[1]]  # type: ignore
                result += huffman_code
        return result
Пример #6
0
 def test_from_lecture_slides(self):
     """Test using example from lecture slides."""
     expected = "00100011000110001"
     assert elias_encoder.encode_single_value(561) == expected
Пример #7
0
 def test_more_values(self):
     expected = "1"
     assert elias_encoder.encode_single_value(1) == expected
     expected = "010"
     assert elias_encoder.encode_single_value(2) == expected
     expected = "011"
     assert elias_encoder.encode_single_value(3) == expected
     expected = "000100"
     assert elias_encoder.encode_single_value(4) == expected
     expected = "000101"
     assert elias_encoder.encode_single_value(5) == expected
     expected = "000110"
     assert elias_encoder.encode_single_value(6) == expected
     expected = "000111"
     assert elias_encoder.encode_single_value(7) == expected
     expected = "0011000"
     assert elias_encoder.encode_single_value(8) == expected
     expected = "0011001"
     assert elias_encoder.encode_single_value(9) == expected
     expected = "0011010"
     assert elias_encoder.encode_single_value(10) == expected
     expected = "0011111"
     assert elias_encoder.encode_single_value(15) == expected
     expected = "00000010000"
     assert elias_encoder.encode_single_value(16) == expected
Пример #8
0
 def test_all(self):
     for i in range(1, 100000):
         code = elias_encoder.encode_single_value(i)
         decoded = elias_decoder.decode_single_value(code)
         assert i == decoded
Пример #9
0
    def decode(self) -> str:
        """Decode the LZSS Encoded code."""
        code = self.code
        # Decode Header

        # Decode the number of unique ASCII characters.
        no_of_unique = elias_decoder.decode_single_value(code)
        code_to_remove = elias_encoder.encode_single_value(no_of_unique)
        code = code[len(code_to_remove) :]
        for i in range(no_of_unique):
            # Decode the fixed-length 8-bit ASCII code of the unique char.
            unique_char_code = code[0:8]
            unique_char = chr(int(unique_char_code, 2))
            code = code[8:]

            # Decode the length of the variable-length Elias code of the
            # length of the Huffman code used for the unique char.
            length_of_huffman = elias_decoder.decode_single_value(code)
            code_to_remove = elias_encoder.encode_single_value(
                length_of_huffman
            )
            code = code[len(code_to_remove) :]

            # Retrieve the Huffman codeword for that unique char.
            huffman_code = code[0:length_of_huffman]
            code = code[length_of_huffman:]

            self.huffman_coding[unique_char] = huffman_code

        # Decode Data
        no_of_lzss_encodings = elias_decoder.decode_single_value(code)
        code_to_remove = elias_encoder.encode_single_value(
            no_of_lzss_encodings
        )
        code = code[len(code_to_remove) :]

        lzss_encodings: List[EncodingType] = []
        for i in range(no_of_lzss_encodings):
            # Check first bit of LZSS encoding
            format_bit = int(code[0])
            code = code[1:]

            if format_bit == 0:
                # Decode Offset
                offset = elias_decoder.decode_single_value(code)
                code_to_remove = elias_encoder.encode_single_value(offset)
                code = code[len(code_to_remove) :]

                # Decode length
                length = elias_decoder.decode_single_value(code)
                code_to_remove = elias_encoder.encode_single_value(length)
                code = code[len(code_to_remove) :]
                lzss_encodings.append((0, offset, length))
            elif format_bit == 1:
                char, huffman_code = self.search_for_char(code)
                code = code[len(huffman_code) :]
                lzss_encodings.append((1, char))
            else:
                raise Exception

        decoder = lzss_decoder_class.LZSSDecoder(lzss_encodings)
        return decoder.decode()