Пример #1
0
def generate_tree_general(node_lst: List[ReadNode],
                          root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes nothing about the order of the tree nodes in the list.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 1, 1, 0)]
    >>> generate_tree_general(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(10, None, None), \
HuffmanTree(12, None, None)), \
HuffmanTree(None, HuffmanTree(5, None, None), HuffmanTree(7, None, None)))
    """
    # 0 is leaf, 1 is not a leaf
    root = node_lst[root_index]
    tree = HuffmanTree(None, None, None)
    if root.l_type == 1:
        a = node_lst[root.l_data]
        tree.left = __generate_tree_gen_help(node_lst, a)
    else:
        tree.left = HuffmanTree(root.l_data, None, None)
    if root.r_type == 1:
        a = node_lst[root.r_data]
        tree.right = __generate_tree_gen_help(node_lst, a)
    else:
        tree.right = HuffmanTree(root.r_data, None, None)
    return tree
Пример #2
0
def _get_min(symbol_list: List[Union[int, HuffmanTree]], freq_list: List[int]) \
        -> Tuple[List[Union[int, HuffmanTree]], List[int]]:
    """
    A helper function for build_huffman_tree that uses the <symbol_list> and
    <freq_list> to map the symbols and frequencies.
    """
    output = []
    total = 0
    for i in range(2):
        # Pop smallest frequency and get its index
        small_freq = min(freq_list)
        index = freq_list.index(small_freq)

        # Add up the sum
        total += small_freq + i * 0

        # Get the symbol for the freq. and pop the freq from the list
        symbol = symbol_list.pop(index)
        freq_list.pop(index)

        if isinstance(symbol, int):
            output.append(HuffmanTree(symbol))
        elif isinstance(symbol, HuffmanTree):
            output.append(symbol)

    tree = HuffmanTree(None, output[0], output[1])

    symbol_list.append(tree)
    freq_list.append(total)

    return symbol_list, freq_list
Пример #3
0
def main():


    input_file = '/home/zsa/bird2.bmp'
    output_file = '/home/zsa/ttt'

    image = cv2.imread(input_file)
    image = cv2.resize(image, (448, 448))
    ycbcr = cv2.cvtColor(image,cv2.COLOR_RGB2YCrCb)

    npmat = np.array(ycbcr, dtype=np.uint8)

    rows, cols = npmat.shape[0], npmat.shape[1]

    # block size: 8x8
    if rows % 8 == cols % 8 == 0:
        blocks_count = rows // 8 * cols // 8
    else:
        raise ValueError(("the width and height of the image "
                          "should both be mutiples of 8"))

    # dc is the top-left cell of the block, ac are all the other cells
    dc = np.empty((blocks_count, 3), dtype=np.int32)
    ac = np.empty((blocks_count, 63, 3), dtype=np.int32)

    for i in range(0, rows, 8):
        for j in range(0, cols, 8):
            try:
                block_index += 1
            except NameError:
                block_index = 0

            for k in range(3):
                # split 8x8 block and center the data range on zero
                # [0, 255] --> [-128, 127]
                block = npmat[i:i+8, j:j+8, k] - 128

                dct_matrix = dct_2d(block)
                quant_matrix = quantize(dct_matrix,
                                        'lum' if k == 0 else 'chrom')
                zz = block_to_zigzag(quant_matrix)

                dc[block_index, k] = zz[0]
                ac[block_index, :, k] = zz[1:]

    H_DC_Y = HuffmanTree(np.vectorize(bits_required)(dc[:, 0]))
    H_DC_C = HuffmanTree(np.vectorize(bits_required)(dc[:, 1:].flat))
    H_AC_Y = HuffmanTree(
            flatten(run_length_encode(ac[i, :, 0])[0]
                    for i in range(blocks_count)))
    H_AC_C = HuffmanTree(
            flatten(run_length_encode(ac[i, :, j])[0]
                    for i in range(blocks_count) for j in [1, 2]))

    tables = {'dc_y': H_DC_Y.value_to_bitstring_table(),
              'ac_y': H_AC_Y.value_to_bitstring_table(),
              'dc_c': H_DC_C.value_to_bitstring_table(),
              'ac_c': H_AC_C.value_to_bitstring_table()}

    write_to_file(output_file, dc, ac, blocks_count, tables)
Пример #4
0
def generate_tree_general(node_lst: List[ReadNode],
                          root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes nothing about the order of the tree nodes in the list.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 1, 1, 0)]
    >>> generate_tree_general(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(10, None, None), \
HuffmanTree(12, None, None)), \
HuffmanTree(None, HuffmanTree(5, None, None), HuffmanTree(7, None, None)))
    >>> leftleft = HuffmanTree(None, HuffmanTree(20), HuffmanTree(71))
    >>> left = HuffmanTree(None, HuffmanTree(3), leftleft)
    >>> right = HuffmanTree(None, HuffmanTree(9), HuffmanTree(10))
    >>> tree = HuffmanTree(None, left, right)
    >>> generate_tree_general([ReadNode(1, 2, 1, 3), ReadNode(0, 4, 0, 12), ReadNode(1, 1, 0, 2), ReadNode(0, 9, 0, 10)], 0)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(None, \
HuffmanTree(4, None, None), HuffmanTree(12, None, None)), \
HuffmanTree(2, None, None)), HuffmanTree(None, HuffmanTree(9, None, None), \
HuffmanTree(10, None, None)))
    """
    curr_node = node_lst[root_index]
    tree = HuffmanTree()
    if curr_node.l_type == 0:
        tree.left = HuffmanTree(curr_node.l_data)
    else:
        treex = generate_tree_general(node_lst, curr_node.l_data)
        tree.left = treex
    if curr_node.r_type == 0:
        tree.right = HuffmanTree(curr_node.r_data)
    else:
        treey = generate_tree_general(node_lst, curr_node.r_data)
        tree.right = treey
    return tree
Пример #5
0
def main(input_file, output_file):

    image = Image.open(input_file)

    ycbcr = image.convert('YCbCr')

    npmat = np.array(ycbcr, dtype=np.uint8)

    rows, cols = npmat.shape[0], npmat.shape[1]

    # block size: 8x8
    if rows % 8 == cols % 8 == 0:
        blocks_count = rows // 8 * cols // 8
    else:
        raise ValueError(("the width and height of the image "
                          "should both be mutiples of 8"))

    # dc is the top-left cell of the block, ac are all the other cells
    dc = np.empty((blocks_count, 3), dtype=np.int32)
    ac = np.empty((blocks_count, 63, 3), dtype=np.int32)

    for i in range(0, rows, 8):
        for j in range(0, cols, 8):
            try:
                block_index += 1
            except NameError:
                block_index = 0

            for k in range(3):
                # split 8x8 block and center the data range on zero
                block = npmat[i:i + 8, j:j + 8, k] - 128
                # discrete cosine block transform
                dct_matrix = dct_2d(block)
                # block quantization
                quant_matrix = quantize(dct_matrix,
                                        'lum' if k == 0 else 'chrom')
                # get an array of coefficients
                zz = zigzag_traversal(block_to_zigzag(quant_matrix))

                dc[block_index, k] = zz[0]
                ac[block_index, :, k] = zz[1:]
    # сreate huffman trees separately for 'dc_y', 'ac_y', 'dc_c', 'ac_c'
    H_DC_Y = HuffmanTree(np.vectorize(bits_required)(dc[:, 0]))
    H_DC_C = HuffmanTree(np.vectorize(bits_required)(dc[:, 1:].flat))
    H_AC_Y = HuffmanTree(
        flatten(
            run_length_encode(ac[i, :, 0])[0] for i in range(blocks_count)))
    H_AC_C = HuffmanTree(
        flatten(
            run_length_encode(ac[i, :, j])[0] for i in range(blocks_count)
            for j in [1, 2]))
    # final tables for blocks
    tables = {
        'dc_y': H_DC_Y.value_to_bitstring_table(),
        'ac_y': H_AC_Y.value_to_bitstring_table(),
        'dc_c': H_DC_C.value_to_bitstring_table(),
        'ac_c': H_AC_C.value_to_bitstring_table()
    }

    write_to_file(output_file, dc, ac, blocks_count, tables)
Пример #6
0
    def make_tree(self, counter):
        """make_tree builds a HuffmanTree and creates a dictionary whose keys are symbols and whose values are 
        the binary code for each symbol and then saves the tree as self.tree"""
        # sortedList = sorted(list, key = lambda HuffmanTree: HuffmanTree.freq)
        counterkeys = list(counter)
        myTrees = []
        for i in counterkeys:
            myTrees += [HuffmanTree(symbol=i, freq=counter[i])]

        while (len(myTrees) > 1):
            lowest = self.lowest(myTrees)
            self.tree = lowest
            myTrees.remove(lowest)
            nextLow = self.lowest(myTrees)
            myTrees.remove(nextLow)
            self.tree = HuffmanTree(right=self.tree,
                                    left=nextLow,
                                    freq=self.tree.freq + nextLow.freq)
            myTrees += [self.tree]

        self.dict = dict(self.tree.get_codes())
        dictionary = dict(
            list(map(lambda x: (x[1], x[0]), self.tree.get_codes())))
        self.tree.read_dict(dictionary)
        return self.tree
Пример #7
0
def _post_order_helper(node_lst: List[ReadNode],
                       root_index: int, flag: bool = True,
                       right_index: int = 0) -> HuffmanTree:
    """
    A helper function that generates a tree based on <node_lst> ReadNodes,
    and uses <root_index> and <flag> and <right_index> to do so.
    """
    # if internal node
    if node_lst[root_index].l_type == 1 and flag:

        # Making Tree
        tree = HuffmanTree(None)

        tree.number = root_index - 1 - right_index

        # Creating Left and Right Trees
        tree.right = _post_order_helper(node_lst, tree.number, False)

        right_index = _find_height(tree.right)

        if right_index is None:
            right_index = 0
        else:
            right_index = len(right_index)

        tree.left = _post_order_helper(
            node_lst, tree.number, True, right_index)

        return tree

    elif node_lst[root_index].r_type == 1 and not flag:
        # Making Tree
        tree = HuffmanTree(None)
        tree.number = root_index - 1

        # Creating Left and Right Trees
        tree.right = _post_order_helper(node_lst, tree.number, False)

        right_index = _find_height(tree.right)

        if right_index is None:
            right_index = 0
        else:
            right_index = len(right_index)

        tree.left = _post_order_helper(
            node_lst, tree.number, True, right_index)

        return tree

    elif node_lst[root_index].l_type == 0 and flag:
        return HuffmanTree(node_lst[root_index].l_data)

    elif node_lst[root_index].r_type == 0 and not flag:
        return HuffmanTree(node_lst[root_index].r_data)

    return HuffmanTree(None)
Пример #8
0
def build_huffman_tree(freq_dict: Dict[int, int]) -> HuffmanTree:
    """ Return the Huffman tree corresponding to the frequency dictionary
    <freq_dict>.

    Precondition: freq_dict is not empty.

    >>> freq = {2: 6, 3: 4}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> t == result
    True
    >>> freq = {2: 6, 3: 4, 7: 5}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(2), \
                             HuffmanTree(None, HuffmanTree(3), HuffmanTree(7)))
    >>> t == result
    True
    >>> import random
    >>> symbol = random.randint(0,255)
    >>> freq = {symbol: 6}
    >>> t = build_huffman_tree(freq)
    >>> any_valid_byte_other_than_symbol = (symbol + 1) % 256
    >>> dummy_tree = HuffmanTree(any_valid_byte_other_than_symbol)
    >>> result = HuffmanTree(None, HuffmanTree(symbol), dummy_tree)
    >>> t.left == result.left or t.right == result.left
    True
    >>> freq = {2: 6, 3: 4, 7: 5, 8: 4}
    >>> tree = build_huffman_tree(freq)
    >>> result =  HuffmanTree(None, HuffmanTree(None, \
    HuffmanTree(3, None, None), HuffmanTree(8, None, None)), \
    HuffmanTree(None, HuffmanTree(7, None, None), HuffmanTree(2, None, None)))
    >>> tree == result
    True
    >>> freq = {3: 1}
    >>> tree = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(3, None, None), HuffmanTree(2, \
    None, None))
    >>> tree == result
    True
    """
    # Empty dictionary
    if freq_dict == {}:
        return HuffmanTree(None)

    # Only one item in freq_dict
    elif len(freq_dict) == 1:
        return HuffmanTree(None, HuffmanTree(list(freq_dict)[0]),
                           HuffmanTree(2))

    else:
        symbol_list = list(freq_dict)
        freq_list = list(freq_dict.values())
        while len(symbol_list) > 1:
            symbol_list, freq_list = _get_min(symbol_list, freq_list)
        return symbol_list[0]
Пример #9
0
def JPEG_encoder(input_image):

    imsize = input_image.shape
    #dct = np.zeros(imsize)

    image = cv2.cvtColor(input_image, cv2.COLOR_RGB2YCrCb)

    rows, cols = imsize[0], imsize[1]
    blocks_count = rows // 8 * cols // 8

    # dc is the top-left cell of the block, ac are all the other cells
    dc = np.empty((blocks_count, 3), dtype=np.int32)
    ac = np.empty((blocks_count, 63, 3), dtype=np.int32)

    for i in range(0, rows, 8):
        for j in range(0, cols, 8):
            try:
                block_index += 1
            except NameError:
                block_index = 0

            for k in range(3):
                # split 8x8 block and center the data range on zero
                # [0, 255] --> [-128, 127]
                block = image[i:i + 8, j:j + 8, k] - 128
                # Block based DCT
                dct_matrix = dct2(block)
                # Quantization
                # zonal coding
                quant_matrix = quantize(dct_matrix,
                                        'lum' if k == 0 else 'chrom')
                zz = block_to_zigzag(quant_matrix)

                dc[block_index, k] = zz[0]
                ac[block_index, :, k] = zz[1:]

    # Huffman
    H_DC_Y = HuffmanTree(np.vectorize(bits_required)(dc[:, 0]))
    H_DC_C = HuffmanTree(np.vectorize(bits_required)(dc[:, 1:].flat))
    H_AC_Y = HuffmanTree(
        flatten(
            run_length_encode(ac[i, :, 0])[0] for i in range(blocks_count)))
    H_AC_C = HuffmanTree(
        flatten(
            run_length_encode(ac[i, :, j])[0] for i in range(blocks_count)
            for j in [1, 2]))

    tables = {
        'dc_y': H_DC_Y.value_to_bitstring_table(),
        'ac_y': H_AC_Y.value_to_bitstring_table(),
        'dc_c': H_DC_C.value_to_bitstring_table(),
        'ac_c': H_AC_C.value_to_bitstring_table()
    }

    return dc, ac, blocks_count, tables
Пример #10
0
def __generate_tree_gen_help(node_lst: list, a: ReadNode) -> HuffmanTree:
    """Helper for generate tree general"""
    buff = HuffmanTree(None, None, None)
    if a.l_type == 0:
        buff.left = HuffmanTree(a.l_data, None, None)
    else:
        b = node_lst[a.l_data]
        buff.left = __generate_tree_gen_help(node_lst, b)
    if a.r_type == 0:
        buff.right = HuffmanTree(a.r_data, None, None)
    else:
        b = node_lst[a.r_data]
        buff.right = __generate_tree_gen_help(node_lst, b)
    return buff
Пример #11
0
def build_huffman_tree(freq_dict: Dict[int, int]) -> HuffmanTree:
    """ Return the Huffman tree corresponding to the frequency dictionary
    <freq_dict>.

    Precondition: freq_dict is not empty.

    >>> freq = {2: 6, 3: 4}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> t == result
    True
    >>> freq = {2: 6, 3: 4, 7: 5}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(2), \
                            HuffmanTree(None, HuffmanTree(3), HuffmanTree(7)))
    >>> t == result
    True
    >>> import random
    >>> symbol = random.randint(0,255)
    >>> freq = {symbol: 6}
    >>> t = build_huffman_tree(freq)
    >>> any_valid_byte_other_than_symbol = (symbol + 1) % 256
    >>> dummy_tree = HuffmanTree(any_valid_byte_other_than_symbol)
    >>> result = HuffmanTree(None, HuffmanTree(symbol), dummy_tree)
    >>> t.left == result.left or t.right == result.right
    True
    """
    if len(freq_dict) == 1:
        index = 0
        for el in freq_dict:
            index = el
        return HuffmanTree(None, HuffmanTree(index), None)

    freq_dict2 = create_leafs(freq_dict)
    while len(freq_dict2) != 1:
        i, j = find_smallest_dict(freq_dict2)
        smallest = freq_dict2[i][1]
        second_smallest = freq_dict2[j][1]
        temp = HuffmanTree(None, smallest, second_smallest)
        combined_freq = freq_dict2[i][0] + freq_dict2[j][0]
        sym = str(i) + str(j)
        freq_dict2[sym] = [combined_freq, temp]
        freq_dict2.pop(i)
        freq_dict2.pop(j)
    index = 0
    for el in freq_dict2:
        index = el
    return freq_dict2[index][1]
Пример #12
0
    def __init__(self, file_name, args):
        self.args = args
        # 队列存储所有配对
        self.word_pair_catch = deque()
        # 采样表
        self.sample_table = []
        # 去掉频率低于mini_count后所有的单词
        self.sentence_length = 0
        # 句子个数
        self.sentence_count = 0
        # 词 --> id
        self.word2id = {}
        # id --> 词
        self.id2word = {}
        # 词频率
        self.word_frequency = {}
        # 去重 去低频次 之后单词个数
        self.word_count = 0
        self.input_file = open(os.path.join(self.args.dir, file_name),
                               encoding='utf-8').readlines()

        self.get_words()
        self.init_sample_table()

        if args.using_hs:
            tree = HuffmanTree(self.word_frequency)
            self.huffman_positive, self.huffman_negative = tree.get_huffman_code_and_path(
            )

        print('Word Count: %d' % len(self.word2id))
        print('Sentence Length: %d' % (self.sentence_length))
        print('Sentence count: %d' % (self.sentence_count))
Пример #13
0
 def build_huffman_tree(self):
     vocab = self.get_vocab()
     vocab = {info[0]: info[1] for w, info in vocab.items()}
     self.huffman = HuffmanTree(vocab)
     self.huffman_left, self.huffman_right = self.huffman.generate_node_left_and_right_path(
     )
     print("build_tree_complete")
Пример #14
0
def build_huffman_tree(freq_dict: Dict[int, int]) -> HuffmanTree:  # FIX THIS
    """ Return the Huffman tree corresponding to the frequency dictionary
    <freq_dict>.

    Precondition: freq_dict is not empty.

    >>> freq = {2: 6, 3: 4}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(3), HuffmanTree(2))
    >>> t == result
    True
    >>> freq = {2: 6, 3: 4, 7: 5}
    >>> t = build_huffman_tree(freq)
    >>> result = HuffmanTree(None, HuffmanTree(2), \
                             HuffmanTree(None, HuffmanTree(3), HuffmanTree(7)))
    >>> t == result
    True
    >>> import random
    >>> symbol = random.randint(0,255)
    >>> freq = {symbol: 6}
    >>> t = build_huffman_tree(freq)
    >>> any_valid_byte_other_than_symbol = (symbol + 1) % 256
    >>> dummy_tree = HuffmanTree(any_valid_byte_other_than_symbol)
    >>> result = HuffmanTree(None, HuffmanTree(symbol), dummy_tree)
    >>> t.left == result.left or t.right == result.left
    True
    """
    if len(freq_dict) == 1:
        a = list(freq_dict.keys())
        b = (a[0] + 1) % 256
        return HuffmanTree(None, HuffmanTree(a[0]), HuffmanTree(b))
    lst = [(freq_dict[j], j) for j in freq_dict]
    lst.sort()
    lst2 = []
    for i in lst:
        lst2.append((i[0], HuffmanTree(i[1])))
    while len(lst2) > 1:
        za = lst2.pop(0)
        warudo = lst2.pop(0)
        a = HuffmanTree(None, za[1], warudo[1])
        lst2.append((za[0] + warudo[0], a))
        lst2.sort()
    real = lst2[0][1]
    __huffman_helper(real)
    return real
Пример #15
0
def generate_tree_postorder(node_lst: List[ReadNode],
                            root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes that the list represents a tree in postorder.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 0, 1, 0)]
    >>> generate_tree_postorder(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(5, None, None), \
    HuffmanTree(7, None, None)), HuffmanTree(None, HuffmanTree(10, None,
    None),\
    HuffmanTree(12, None, None)))

    >>> lst = [ReadNode(0, 104, 0, 101), ReadNode(0, 119, 0, 114), \
    ReadNode(1, 0, 1, 1), ReadNode(0, 100, 0, 111), ReadNode(0, 108, 1, 3), \
    ReadNode(1, 2, 1, 4)]
    >>> tree = generate_tree_postorder(lst, len(lst)-1)
    >>> print(tree)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(None,
    HuffmanTree(104, None, None), HuffmanTree(101, None, None)), \
    HuffmanTree(None, HuffmanTree(119, None, None), \
    HuffmanTree(114, None, None))), \
    HuffmanTree(None, HuffmanTree(108, None, None), \
    HuffmanTree(None, HuffmanTree(100, None, None), \
    HuffmanTree(111, None, None))))
    >>> number_nodes(tree)
    >>> t = bytes_to_nodes(tree_to_bytes(tree))
    >>> t
    [ReadNode(0, 104, 0, 101), ReadNode(0, 119, 0, 114),
    ReadNode(1, 0, 1, 1),\
    ReadNode(0, 100, 0, 111), ReadNode(0, 108, 1, 3), ReadNode(1, 2, 1, 4)]

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 0, 1, 0)]
    >>> tree = generate_tree_postorder(lst, 2)
    >>> number_nodes(tree)
    >>> t = bytes_to_nodes(tree_to_bytes(tree))
    >>> t
    [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), ReadNode(1, 0, 1, 1)]
    """

    tree = HuffmanTree(None)
    tree.right = _post_order_helper(node_lst, root_index, False)

    right_index = _find_height(tree.right)

    if right_index is None:
        right_index = 0
    else:
        right_index = len(right_index)

    tree.left = _post_order_helper(node_lst, root_index, True, right_index)

    _post_order_set_none(tree)

    return tree
Пример #16
0
 def __init__(self, file_name, min_count):
     self.get_words(file_name, min_count)
     print(" ")
     self.cbow_count = []
     self.word_pair_catch = deque()
     self.cbow_word_pair_catch = deque()
     self.init_sample_table()
     tree = HuffmanTree(self.word_frequency)
     print("tree ", tree)
     self.huffman_positive, self.huffman_negative = tree.get_huffman_code_and_path(
     )
     print('Word Count: %d' % len(self.word2id))
     print('Sentence Length: %d' % (self.sentence_length))
Пример #17
0
def generate_tree_general(node_lst: List[ReadNode],
                          root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes nothing about the order of the tree nodes in the list.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 1, 1, 0)]
    >>> generate_tree_general(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(10, None, None), \
HuffmanTree(12, None, None)), \
HuffmanTree(None, HuffmanTree(5, None, None), HuffmanTree(7, None, None)))
    """
    # TODO: Implement this function
    if len(node_lst) == 0:
        return HuffmanTree(None)

    for node in node_lst:

        if node.l_type == 0:  # if left side is a leaf
            pass
        else:
            pass
Пример #18
0
def generate_tree_postorder(node_lst: List[ReadNode],
                            root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes that the list represents a tree in postorder.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 0, 1, 0)]
    >>> generate_tree_postorder(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(5, None, None), \
HuffmanTree(7, None, None)), \
HuffmanTree(None, HuffmanTree(10, None, None), HuffmanTree(12, None, None)))
    """
    # TODO: Implement this function
    pass
Пример #19
0
    def __init__(self, input_file, min_count):
        self.input_file = input_file
        self.sentence_sum_length = 0  # 用于统计句子中出现单词的总数量
        self.sentence_count = 0  # 用于统计随机游走数量
        self.word_count = 0
        self.word2id = dict()
        self.id2word = dict()
        self.word_frequency = dict()  # 词频:用于统计随机游走学列中单词出现的次数
        self.word_pair_catch = deque()  # 是什么

        self.get_words(min_count)
        tree = HuffmanTree(self.word_frequency)
        self.huffman_positive, self.huffman_negative = tree.divide_pos_and_neg(
        )
Пример #20
0
def _gen_tree_helper(node_lst: List[ReadNode],
                     root_index: int, flag: bool = True) -> HuffmanTree:
    """
    A helper function that generates a tree based on <node_lst> ReadNodes,
    and uses <root_index> and <flag> to do so.
    """
    # if internal node
    if node_lst[root_index].l_type == 1 and flag:

        # Making Tree
        tree = HuffmanTree(None)
        tree.number = node_lst[root_index].l_data

        # Creating Left and Right Trees
        tree.left = _gen_tree_helper(node_lst, tree.number, True)
        tree.right = _gen_tree_helper(node_lst, tree.number, False)

        return tree

    elif node_lst[root_index].r_type == 1 and not flag:
        # Making Tree
        tree = HuffmanTree(None)
        tree.number = node_lst[root_index].r_data

        # Creating Left and Right Trees
        tree.left = _gen_tree_helper(node_lst, tree.number, True)
        tree.right = _gen_tree_helper(node_lst, tree.number, False)

        return tree

    elif node_lst[root_index].l_type == 0 and flag:
        return HuffmanTree(node_lst[root_index].l_data)

    elif node_lst[root_index].r_type == 0 and not flag:
        return HuffmanTree(node_lst[root_index].r_data)

    return HuffmanTree(None)
Пример #21
0
def create_leafs(freq_dict: Dict[int, int]) -> Dict[int, list]:
    """ Given a frequency dictionary, make leafs for every symbol and then puts
    the leaf and the frequency in a list and returns a dictionary with symbols
    as keys
    Helper for build_huffman_tree
    >>> a = {2: 32, 3: 34, 4: 21, 5: 55}
    >>> e1 = HuffmanTree(2)
    >>> e2 = HuffmanTree(3)
    >>> e3 = HuffmanTree(4)
    >>> e4 = HuffmanTree(5)
    >>> create_leafs(a)
    {32 : e1, 34: e2, 21: e3,  55: e4}
    """
    result = {}
    for el in freq_dict:
        tree = HuffmanTree(el)
        result[el] = [freq_dict[el], tree]
    return result
Пример #22
0
def huffman_encoding(text):
    if not text:
        raise ValueError("Text is empty. Abort encoding.")

    tree = HuffmanTree()
    tree.grow(text).encode()
    char_code = tree.get_mapping()

    res = ""
    for char in text:
        try:
            code = char_code[char]
        except:
            raise KeyError(
                f"{char} does not exist in the mapping. Abort encoding.")
        res += code

    return res, tree
Пример #23
0
    def read_message_file(self):
        """read_message_file uncompresseses a binary (byte) file and save the resulting text as self.text
        using the HuffmanTree class's find_char method"""
        f = open(self.filename, "rb")
        readBytes = f.read()
        f.close()
        readBytes = list(readBytes)

        stringOfBits = ""
        for i in range(len(readBytes)):
            stringOfBits += binary.EightBitNumToBinary(readBytes[i])

        self.tree = HuffmanTree()
        self.tree.read_dict(self.dictionary)
        
        toPrint = ""
        for i in range(self.fileLength):
            toPrint += self.tree.find_char(stringOfBits)[0]
            stringOfBits = stringOfBits[ self.tree.find_char(stringOfBits)[1]: ]
        self.text = toPrint
Пример #24
0
 def __init__(self,
              segWordPath,
              window=5,
              vecLen=400,
              minValue=0,
              learnRate=0.001):
     '''
     init 
     segWordPath: 分好词的文件路径
     '''
     #self.wordDict = WordCounter(segWordPath).larger_than(minValue)
     self.wordDict = WordCounter(segWordPath).wordDict
     self.segPath = segWordPath
     tree = HuffmanTree(self.wordDict, vecLen)
     self.treeRoot = tree.root
     self.HuffmanDict = tree.HuffmanDict
     #print(self.wordDict)
     #print(tree.HuffmanDict)
     self.wordVec = defaultdict()
     self.window = window
     self.vecLen = vecLen
     self.learnRate = learnRate
Пример #25
0
def generate_tree_postorder(node_lst: List[ReadNode],
                            root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes that the list represents a tree in postorder.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 0, 1, 0)]
    >>> generate_tree_postorder(lst, 2)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(5, None, None), \
HuffmanTree(7, None, None)), \
HuffmanTree(None, HuffmanTree(10, None, None), HuffmanTree(12, None, None)))
    """
    # 0 is leaf, 1 is not a leaf
    root_index += 1
    tree = HuffmanTree(None, None, None)
    for i in range(len(node_lst) - 1, -1, -1):
        a = node_lst[i]
        if a.l_type == 0 and a.r_type == 0:
            left_nodes = node_lst[0:i]
            right_nodes = node_lst[i:-1]
            if len(left_nodes) == 1:
                tree.left = HuffmanTree(
                    None, HuffmanTree(left_nodes[0].l_data, None, None),
                    HuffmanTree(left_nodes[0].r_data, None, None))
            else:
                buffet = __generate_tree_postorder_helper(left_nodes)
                tree.left = buffet
            if len(right_nodes) == 1:
                tree.right = HuffmanTree(
                    None, HuffmanTree(right_nodes[0].l_data, None, None),
                    HuffmanTree(right_nodes[0].r_data, None, None))
            else:
                buffet = __generate_tree_postorder_helper(right_nodes)
                tree.right = buffet
            break
    return tree
Пример #26
0
def generate_tree_general(node_lst: List[ReadNode],
                          root_index: int) -> HuffmanTree:
    """ Return the Huffman tree corresponding to node_lst[root_index].
    The function assumes nothing about the order of the tree nodes in the list.

    >>> lst = [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), \
    ReadNode(1, 1, 1, 0)]
    >>> tree = generate_tree_general(lst, 2)
    >>> result = HuffmanTree(None,HuffmanTree(None, \
    ...         HuffmanTree(10, None, None),\
    ...         HuffmanTree(12, None, None)), HuffmanTree(None,\
    ...         HuffmanTree(5, None, None),\
    ...         HuffmanTree(7, None, None)))
    >>> result == tree
    True

    >>> lst = [ReadNode(0, 104, 0, 101), ReadNode(0, 119, 0, 114), \
    ReadNode(1, 0, 1, 1), ReadNode(0, 100, 0, 111), ReadNode(0, 108, 1, 3), \
    ReadNode(1, 2, 1, 4)]
    >>> generate_tree_general(lst, len(lst)-1)
    HuffmanTree(None, HuffmanTree(None, HuffmanTree(None, \
    HuffmanTree(104, None, None), HuffmanTree(101, None, None)), \
    HuffmanTree(None, HuffmanTree(119, None, None), \
    HuffmanTree(114, None, None))), \
    HuffmanTree(None, HuffmanTree(108, None, None), \
    HuffmanTree(None, HuffmanTree(100, None, None), \
    HuffmanTree(111, None, None))))
    >>> lst = [ReadNode(1, 1, 1, 2), ReadNode(0, 5, 0, 7),
    ReadNode(0, 10, 0, 12)]
    >>> tree = generate_tree_general(lst, 0)
    >>> number_nodes(tree)
    >>> bytes_to_nodes(tree_to_bytes(tree))
    [ReadNode(0, 5, 0, 7), ReadNode(0, 10, 0, 12), ReadNode(1, 0, 1, 1)]
    """
    tree = HuffmanTree(None)
    tree.left = _gen_tree_helper(node_lst, root_index, True)
    tree.right = _gen_tree_helper(node_lst, root_index, False)
    return tree
Пример #27
0
def test_huffman_internal(symbols, expected_sizes):

    print("Running test on source ", symbols)

    tree = HuffmanTree(symbols)
    codebook = tree.codebook

    is_prefix_free = True
    is_optimal_size = True

    # Check for prefix-free-ness
    for _, v1 in codebook.items():
        for v2 in [i for _, i in codebook.items() if i != v1]:
            if v1.startswith(v2):
                is_prefix_free = False
                break
        if not is_prefix_free:
            break

    if not is_prefix_free:
        print("  FAILED: Code was not prefix free")
        print("  Your codebook:", codebook)
        return

    # Check expected sizes of encodings
    for symbol, expected in expected_sizes.items():
        got = len(codebook[symbol])
        if got != expected:
            is_optimal_size = False
            break

    if not is_optimal_size:
        print("  FAILED: Code was not optimal")
        print("  Your codebook:", codebook)
        return

    print("  Test passed", codebook)
Пример #28
0
def __generate_tree_postorder_helper(node_lst: list) -> HuffmanTree:
    """Helper for generate tree postorder."""
    tree = HuffmanTree(None, None, None)
    for i in range(len(node_lst) - 1, -1, -1):
        a = node_lst[i]
        if a.l_type == 0 and a.r_type == 0:
            left_nodes = node_lst[0:i]
            right_nodes = node_lst[i:-1]
            if len(left_nodes) == 1:
                tree.left = HuffmanTree(
                    None, HuffmanTree(left_nodes[0].l_data, None, None),
                    HuffmanTree(left_nodes[0].r_data, None, None))
            else:
                buffet = __generate_tree_postorder_helper(left_nodes)
                tree.left = buffet
            if len(right_nodes) == 1:
                tree.right = HuffmanTree(
                    None, HuffmanTree(right_nodes[0].l_data, None, None),
                    HuffmanTree(right_nodes[0].r_data, None, None))
            else:
                buffet = __generate_tree_postorder_helper(right_nodes)
                tree.right = buffet
            break
    return tree
Пример #29
0
def main():
    start = datetime.datetime.now()
    parser = argparse.ArgumentParser()
    parser.add_argument("input", help="path to the input image")
    # parser.add_argument("output", help="path to the output image")
    args = parser.parse_args()

    input_file = args.input
    # output_file = args.output
    tole = len(input_file)
    poi = 0
    for i in input_file:
        if i != ".":
            poi += 1
        else:
            break
    exte = input_file[poi + 1:]
    print("exte : ", exte)
    image = Image.open(input_file)
    input_file = input_file[:poi]
    or_img = img2arr(image)
    print("original image shape : ", or_img.shape)
    ycbcr = image.convert('YCbCr')
    npmat = np.array(ycbcr, dtype=np.uint8)
    rows, cols = npmat.shape[0], npmat.shape[1]
    orows, ocols = rows, cols
    print("old shape : ", orows, " * ", ocols)
    rows = int(rows / 8) * 8
    cols = int(cols / 8) * 8
    # npmat.reshape((rows, cols, 3)) WRONG
    npmat = npmat[0:rows, 0:cols, :]
    print("new shape : ", npmat.shape[0], " * ", npmat.shape[1])

    # block size: 8x8
    """
    if rows % 8 == cols % 8 == 0:
        blocks_count = rows // 8 * cols // 8
    else:
    	if rows % 8 != 0 and cols % 8 != 0:
    		blocks_count = int(rows / 8) * int(cols / 8)
    """
    print(rows / 8, cols / 8, int(rows / 8), int(cols / 8))
    blocks_count = int(rows / 8) * int(cols / 8)

    # raise ValueError(("the width and height of the image should both be mutiples of 8"))
    print("blocks_count : ", blocks_count)
    # dc is the top-left cell of the block, ac are all the other cells
    dc = np.empty((blocks_count, 3), dtype=np.int32)
    ac = np.empty((blocks_count, 63, 3), dtype=np.int32)
    print("rows", rows, " cols ", cols)
    for i in range(0, rows, 8):
        for j in range(0, cols, 8):
            try:
                block_index += 1
            except NameError:
                block_index = 0

            for k in range(3):
                # split 8x8 block and center the data range on zero
                # [0, 255] --> [-128, 127]
                block = npmat[i:i + 8, j:j + 8, k] - 128

                dct_matrix = fftpack.dct(block, norm='ortho')
                quant_matrix = quantize(dct_matrix,
                                        'lum' if k == 0 else 'chrom')
                # print("P")
                zz = block_to_zigzag(quant_matrix)
                # print("Q")

                dc[block_index, k] = zz[0]
                ac[block_index, :, k] = zz[1:]
        # print("ENCODING_Outer")
    H_DC_Y = HuffmanTree(np.vectorize(bits_required)(dc[:, 0]))
    H_DC_C = HuffmanTree(np.vectorize(bits_required)(dc[:, 1:].flat))
    H_AC_Y = HuffmanTree(
        flatten(
            run_length_encode(ac[i, :, 0])[0] for i in range(blocks_count)))
    H_AC_C = HuffmanTree(
        flatten(
            run_length_encode(ac[i, :, j])[0] for i in range(blocks_count)
            for j in [1, 2]))

    tables = {
        'dc_y': H_DC_Y.value_to_bitstring_table(),
        'ac_y': H_AC_Y.value_to_bitstring_table(),
        'dc_c': H_DC_C.value_to_bitstring_table(),
        'ac_c': H_AC_C.value_to_bitstring_table()
    }

    # print("B")
    print("ENCODING DONE................................................")
    print("time passed : ", ((datetime.datetime.now() - start).seconds) / 60,
          " minutes")
    # write_to_file(output_file, dc, ac, blocks_count, tables)
    # print("C")
    # assuming that the block is a 8x8 square
    block_side = 8

    # assuming that the image height and width are equal
    # image_side = int(math.sqrt(blocks_count)) * block_side
    # rows = 672
    # cols = 1200

    # blocks_per_line = image_side // block_side

    npmat = np.empty(or_img.shape, dtype=np.uint8)
    """
    for block_index in range(blocks_count):
        i = block_index // blocks_per_line * block_side
        j = block_index % blocks_per_line * block_side

        for c in range(3):
            zigzag = [dc[block_index, c]] + list(ac[block_index, :, c])
            quant_matrix = zigzag_to_block(zigzag)
            dct_matrix = dequantize(quant_matrix, 'lum' if c == 0 else 'chrom')
            block = fftpack.idct(dct_matrix, norm='ortho')
            npmat[i:i+8, j:j+8, c] = block + 128
    """
    # block_index = 0
    i, j = 0, 0
    print("rows : ", rows, " cols : ", cols)
    for i in range(0, rows, 8):
        # print("DECODING_Outer")
        for j in range(0, cols, 8):
            try:
                block_index1 += 1
            except NameError:
                block_index1 = 0

            for c in range(3):
                zigzag = [dc[block_index1, c]] + list(ac[block_index1, :, c])
                quant_matrix = zigzag_to_block(zigzag)
                dct_matrix = dequantize(quant_matrix,
                                        'lum' if c == 0 else 'chrom')
                block = fftpack.idct(dct_matrix, norm='ortho')
                npmat[i:i + 8, j:j + 8, c] = block + 128

    image = Image.fromarray(npmat, 'YCbCr')
    image = image.convert('RGB')
    npmat[-(orows - rows):, -(ocols - cols):, :] = or_img[-(orows - rows):,
                                                          -(ocols - cols):, :]
    # image.show()
    print("DONE. time passed : ",
          ((datetime.datetime.now() - start).seconds) / 60, " minutes")
    output_file = input_file + "_opti_by_pkikani." + exte
    image.save(output_file)
Пример #30
0
                q, quant_matrix = quantize(dct_matrix, scale, m, n)

                # =========================
                # 부호화
                # =========================
                zz = block_to_zigzag(quant_matrix)

                dc[block_index, k] = zz[0]
                ac[block_index, :, k] = zz[1:]

    pprint(q)

    # =========================
    # 허프만 부호화
    # =========================
    H_DC_Y = HuffmanTree(np.vectorize(bits_required)(dc[:, 0]))
    H_DC_C = HuffmanTree(np.vectorize(bits_required)(dc[:, 1:].flat))
    H_AC_Y = HuffmanTree(
        flatten(
            run_length_encode(ac[i, :, 0])[0] for i in range(blocks_count)))
    H_AC_C = HuffmanTree(
        flatten(
            run_length_encode(ac[i, :, j])[0] for i in range(blocks_count)
            for j in [1, 2]))

    tables = {
        'dc_y': H_DC_Y.value_to_bitstring_table(),
        'ac_y': H_AC_Y.value_to_bitstring_table(),
        'dc_c': H_DC_C.value_to_bitstring_table(),
        'ac_c': H_AC_C.value_to_bitstring_table()
    }