Пример #1
0
def compress(input_file, output_path):
    ''' Compress input_file, store it in output_path and then
    return output_file '''
    # Get file name and extension
    input_filename, input_fileext = os.path.splitext(
        os.path.basename(input_file))
    output_filename = input_filename + '.huffman'
    output_file = os.path.join(output_path, output_filename)
    print(output_file + " aa")
    with open(input_file, 'r') as f:
        data = f.read()
        f.close()

    # Get frequency table from data
    frequencies = collections.Counter(data)
    root = build_tree(frequencies)
    encoded_str = utility.get_encoded_str(root, data)
    padded_encoded_str = utility.pad_encoded_str(encoded_str)
    byte_data = utility.get_byte_array(padded_encoded_str)

    with open(output_file, 'wb') as out:
        # Serialize data to file
        pickle.dump((frequencies, byte_data), out)
        out.close()
    return output_file
 def test_get_encoded_str(self):
     data = 'ABB'
     n1 = Node('A', 1)
     n2 = Node('B', 2)
     root = Node('', 3, n2, n1)
     encoded_str = utility.get_encoded_str(root, data)
     assert encoded_str == '100'
 def test_get_decoded_str(self):
     data = 'aab'
     n1 = Node('a', 2)
     n2 = Node('b', 1)
     root = Node('', 3, n2, n1)
     encoded_str = utility.get_encoded_str(root, data)
     decoded_str = utility.get_decoded_str(root, encoded_str)
     assert decoded_str == data
Пример #4
0
def compress(input_file, output_path):
    input_filename, input_fileext = os.path.splitext(
        os.path.basename(input_file))
    output_filename = input_filename + '.fano'
    output_file = os.path.join(output_path, output_filename)

    with open(input_file, 'r') as f:
        data = f.read()

    frequencies = collections.Counter(data)
    nodes = create_nodes_from_frequencies(frequencies)
    root = build_tree(nodes)
    codes = utility.get_codes(root)
    encoded_str = utility.get_encoded_str(root, data)
    padded_encoded_str = utility.pad_encoded_str(encoded_str)
    byte_array = utility.get_byte_array(padded_encoded_str)

    with open(output_file, 'wb') as out:
        # Serialize data to file
        pickle.dump((frequencies, byte_array), out)

    return output_file