def compressed_index_to_file_elias_gamma(index, out_file_name): print "Compressing with elias gamma..." from kbp.univ import elias import struct file_name = out_file_name + "_elias_gamma" f = open(file_name, "wb") for k, v in index.items(): word_len = len(k.encode("utf-8")) f.write(struct.pack("I", word_len)) f.write(k.encode("utf-8")) f.write(struct.pack("I", v[0])) entries = "" for i in v[1]: entries += elias.gamma_encode(i + 1) if len(entries) % 32 != 0: zeroes = 32 - len(entries) % 32 for i in range(zeroes): entries += "0" numbers = [] count = len(entries) / 32 for idx in range(0, count): numbers.append(int(entries[idx * 32 : (idx + 1) * 32], 2)) f.write(struct.pack("I", count)) for number in numbers: f.write(struct.pack("I", number)) f.close() return
def compressed_index_to_file_elias_gamma(index, out_file_name): print "Compressing with elias gamma..." from kbp.univ import elias import struct file_name = out_file_name + "_elias_gamma" f = open(file_name, 'wb') for k, v in index.items(): word_len = len(k.encode("utf-8")) f.write(struct.pack('I', word_len)) f.write(k.encode("utf-8")) f.write(struct.pack('I', v[0])) entries = "" for i in v[1]: entries += elias.gamma_encode(i + 1) if len(entries) % 32 != 0: zeroes = 32 - len(entries) % 32 for i in range(zeroes): entries += "0" numbers = [] count = len(entries) / 32 for idx in range(0, count): numbers.append(int(entries[idx * 32:(idx + 1) * 32], 2)) f.write(struct.pack('I', count)) for number in numbers: f.write(struct.pack('I', number)) f.close() return
#Kabopan - Readable Algorithms. Public Domain, 2007-2009 from kbp.univ.elias import ( \ elias_split, gamma_encode, gamma_decode, interleaved_gamma_encode, interleaved_gamma_decode, delta_encode, delta_decode, omega_encode, omega_decode) assert elias_split(1) == (0, "") assert elias_split(14) == (3, "110") assert gamma_encode(1) == "1" assert gamma_encode(2) == "010" assert gamma_encode(14) == "0001" + "110" assert gamma_decode("000010001") == (17, 9) assert gamma_decode("00001000100") == (17, 9) assert interleaved_gamma_encode(14) == "101001" assert interleaved_gamma_decode("101001") == (14, 6) assert delta_encode(17) == "001010001" assert delta_decode("001010001") == (17, 9) assert [omega_encode(i) for i in range(1, 18)] == ['0', '100', '110', '101000', '101010', '101100', '101110', '1110000', '1110010', '1110100', '1110110', '1111000', '1111010', '1111100', '1111110', '10100100000', '10100100010'] assert omega_decode('10100100010') == (17, 11) assert omega_decode('1010010001000') == (17, 11)