def main(args): # Handle command line arguments if len(args) != 2: sys.exit("Usage: python huffman-compress.py InputFile OutputFile") inputfile = args[0] outputfile = args[1] # Read input file once to compute symbol frequencies. # The resulting generated code is optimal for static Huffman coding and also canonical. freqs = get_frequencies(inputfile) freqs.increment(256) # EOF symbol gets a frequency of 1 code = freqs.build_code_tree() canoncode = huffmancoding.CanonicalCode(tree=code, symbollimit=257) # Replace code tree with canonical one. For each symbol, # the code value may change but the code length stays the same. code = canoncode.to_code_tree() # Read input file again, compress with Huffman coding, and write output file inp = open(inputfile, "rb") bitout = huffmancoding.BitOutputStream(open(outputfile, "wb")) try: write_code_len_table(bitout, canoncode) compress(code, inp, bitout) finally: bitout.close() inp.close()
def main(args): # Coleta argumentos de linha de comando. if len(args) != 2: sys.exit( "Usage: python huffman-compress.py ArquivoEntrada ArquivoSaida") inputfile = args[0] outputfile = args[1] # Le arquivo de entrada para processar a frequencia dos simbolos freqs = get_frequencies(inputfile) freqs.increment(256) # simbolo EOF recebe frequencia 1. code = freqs.build_code_tree() canoncode = huffmancoding.CanonicalCode(tree=code, symbollimit=257) code = canoncode.to_code_tree() # le arquivo novamente, comprime e escreve no arquivo saida. inp = open(inputfile, "rb") bitout = huffmancoding.BitOutputStream(open(outputfile, "wb")) try: write_code_len_table(bitout, canoncode) compress(code, inp, bitout) finally: bitout.close() inp.close()
def compressor(input_files, output_file): # Read input file once to compute symbol frequencies. # The resulting generated code is optimal for static Huffman coding and also canonical. freqs = huffmancoding.FrequencyTable([0] * 257) files_count = len(input_files) for files in input_files: get_frequencies(files, freqs) freqs.increment(256) # EOF symbol code = freqs.build_code_tree() canoncode = huffmancoding.CanonicalCode( tree=code, symbollimit=freqs.get_symbol_limit()) # Replace code tree with canonical one. For each symbol, # the code value may change but the code length stays the same. code = canoncode.to_code_tree() # Read input file again, compress with Huffman coding, and write output file with contextlib.closing( huffmancoding.BitOutputStream(open(output_file + ".mhs", "wb"))) as bitout: inp = [open(filename, 'rb') for filename in input_files] files_name = [os.path.basename(f.name) for f in inp] write_code_len_table(bitout, canoncode, files_name) compress(code, inp, bitout, files_count) for file in inp: file.close()
def read_code_len_table(bitin): codelengths = [] for i in range(257): # Le valores em big endian, 8 bits val = 0 for j in range(8): val = (val << 1) | bitin.read_no_eof() codelengths.append(val) return huffmancoding.CanonicalCode(codelengths=codelengths)
def read_code_len_table(bitin): def read_int(n): result = 0 for _ in range(n): result = (result << 1) | bitin.read_no_eof() # Big endian return result codelengths = [read_int(8) for _ in range(257)] return huffmancoding.CanonicalCode(codelengths=codelengths)
def read_code_len_table(bitin): codelengths = [] for i in range(257): # For this file format, we read 8 bits in big endian val = 0 for j in range(8): val = (val << 1) | bitin.read_no_eof() codelengths.append(val) return huffmancoding.CanonicalCode(codelengths=codelengths)
def huffman_compress(code_freq, quant_code, codeword_refer, output_file): code_freq.append(1) freq = huffmancoding.FrequencyTable(code_freq) code_tree = freq.build_code_tree() # Replace code tree with canonical one. For each symbol, the code value may change but the code length stays the same. canonical_code = huffmancoding.CanonicalCode(tree=code_tree, symbollimit=freq.get_symbol_limit()) code_tree = canonical_code.to_code_tree() codes = code_tree.codes # f0 = open(codeword_refer, "w") # for c in codes: # f0.write(f'{c} \n') # f0.close() with contextlib.closing(huffmancoding.BitOutputStream(open(output_file, "wb"))) as bit_out: write_code_len_table(bit_out, canonical_code) compress(code_tree, quant_code, bit_out)