def a2b_l(cs, lengthinbits): """ @param lengthinbits the number of bits of data in encoded into cs a2b_l() will return a result big enough to hold lengthinbits bits. So for example if cs is 4 characters long (encoding at least 15 and up to 20 bits) and lengthinbits is 16, then a2b_l() will return a string of length 2 (since 2 bytes is sufficient to store 16 bits). If cs is 4 characters long and lengthinbits is 20, then a2b_l() will return a string of length 3 (since 3 bytes is sufficient to store 20 bits). Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l(). @return the data encoded in cs """ precondition(could_be_base32_encoded_l(cs, lengthinbits), "cs is required to be possibly base32 encoded data.", cs=cs, lengthinbits=lengthinbits) qs = [ord(v) for v in string.translate(cs, c2vtranstable)] numoctets = div_ceil(lengthinbits, 8) numquintetsofdata = div_ceil(lengthinbits, 5) # append zero quintets for padding if needed numquintetsneeded = div_ceil(numoctets * 8, 5) qs.extend([0] * (numquintetsneeded - len(qs))) octets = [] pos = 2048 num = qs[0] * pos readybits = 5 i = 1 while len(octets) < numoctets: while pos > 256: pos = pos / 32 num = num + (qs[i] * pos) i = i + 1 octet = num / 256 octets.append(octet) num = num - (octet * 256) num = num * 256 pos = pos * 256 assert len( octets) == numoctets, "len(octets): %s, numoctets: %s, octets: %s" % ( len(octets), numoctets, octets, ) res = ''.join([chr(o) for o in octets]) precondition( b2a_l(res, lengthinbits) == cs, "cs is required to be the canonical base-32 encoding of some data.", b2a(res), res=res, cs=cs) return res
def recursive_subset_sum(entropy_needed, wordlists): # Pick a minimalish set of numbers which sum to at least # entropy_needed. # Okay now what's the smallest number of words which will give us # at least this much entropy? entropy_of_biggest_wordlist = wordlists[-1][0] assert isinstance(entropy_of_biggest_wordlist, float), wordlists[-1] needed_words = div_ceil(entropy_needed, entropy_of_biggest_wordlist) # How much entropy do we need from each word? needed_entropy_per_word = entropy_needed / needed_words # What's the smallest wordlist that offers at least this much # entropy per word? for (wlentropy, wl) in wordlists: if wlentropy >= needed_entropy_per_word: break assert wlentropy >= needed_entropy_per_word, (wlentropy, needed_entropy_per_word) result = [(wlentropy, wl)] # If we need more, recurse... if wlentropy < entropy_needed: rest = recursive_subset_sum(entropy_needed - wlentropy, wordlists) result.extend(rest) return result
def a2b_l(cs, lengthinbits): """ @param lengthinbits the number of bits of data in encoded into cs a2b_l() will return a result big enough to hold lengthinbits bits. So for example if cs is 4 characters long (encoding at least 15 and up to 20 bits) and lengthinbits is 16, then a2b_l() will return a string of length 2 (since 2 bytes is sufficient to store 16 bits). If cs is 4 characters long and lengthinbits is 20, then a2b_l() will return a string of length 3 (since 3 bytes is sufficient to store 20 bits). Please see the warning in the docstring of b2a_l() regarding the use of b2a() versus b2a_l(). @return the data encoded in cs """ precondition(could_be_base32_encoded_l(cs, lengthinbits), "cs is required to be possibly base32 encoded data.", cs=cs, lengthinbits=lengthinbits) qs = [ord(v) for v in string.translate(cs, c2vtranstable)] numoctets = div_ceil(lengthinbits, 8) numquintetsofdata = div_ceil(lengthinbits, 5) # append zero quintets for padding if needed numquintetsneeded = div_ceil(numoctets*8, 5) qs.extend([0]*(numquintetsneeded-len(qs))) octets = [] pos = 2048 num = qs[0] * pos readybits = 5 i = 1 while len(octets) < numoctets: while pos > 256: pos = pos / 32 num = num + (qs[i] * pos) i = i + 1 octet = num / 256 octets.append(octet) num = num - (octet * 256) num = num * 256 pos = pos * 256 assert len(octets) == numoctets, "len(octets): %s, numoctets: %s, octets: %s" % (len(octets), numoctets, octets,) res = ''.join([chr(o) for o in octets]) precondition(b2a_l(res, lengthinbits) == cs, "cs is required to be the canonical base-32 encoding of some data.", b2a(res), res=res, cs=cs) return res
def encode_file_to_streams(file_path, block_size, k, m): """ break file into file stripes. @type file_path : C{str} @param file_path : abs path of file to encode. @type block_size : C{int} @param block_size : encode unit. @type k : C{int} @param k : number of necessary file shares to restore. @type m : C{int} @param m : m-k redundant file shares. @rtype : C{list} @return : m file streams, each stream is a str. """ file_size = path.getsize(file_path) file = open(file_path) fencoder = Encoder(k, m) block_count = mathutil.div_ceil(file_size, block_size) ds = [] ds.extend([""] * k) # for the last round , block might be not complete or empty index = 0 results = [] results.extend([""] * m) for i in range(block_count / k): for index in range(k): ds[index] = file.read(block_size) ds[k - 1] = ds[k - 1] + "\x00" * (block_size - len(ds[k - 1])) temp = fencoder.encode(ds) for j in range(m): results[j] = results[j] + temp[j] if block_count % k == 0: return results # the last round for i in range(block_count % k): ds[i] = file.read(block_size) ds[i] = ds[i] + "\x00" * (block_size - len(ds[i])) for index in range(i + 1, k): ds[index] = "\x00" * (len(ds[0])) temp = fencoder.encode(ds) for j in range(m): results[j] = results[j] + temp[j] return results
def _make_new_rand_data(size, k, m): global d, easyfecenc, fecenc, K, M K = k M = m d = os.urandom(size) del ds[:] ds.extend([None] * k) blocksize = mathutil.div_ceil(size, k) for i in range(k): ds[i] = d[i * blocksize:(i + 1) * blocksize] ds[-1] = ds[-1] + "\x00" * (len(ds[-2]) - len(ds[-1])) easyfecenc = easyfec.Encoder(k, m) fecenc = Encoder(k, m)
def share_size_for_data(shares_needed, datasize): """ Calculate the size of a single erasure encoding share for data of the given size and with the given level of redundancy. :param int shares_needed: The number of shares (``k``) from the erasure encoding process which are required to reconstruct original data of the indicated size. :param int datasize: The size of the data to consider, in bytes. :return int: The size of a single erasure encoding share for the given inputs. """ return div_ceil(datasize, shares_needed)
def test_odd_sizes(self): for j in range(2**6): lib = random.randrange(1, 2**8) numos = mathutil.div_ceil(lib, 8) bs = insecurerandstr(numos) # zero-out unused least-sig bits if lib%8: b=ord(bs[-1]) b = b >> (8 - (lib%8)) b = b << (8 - (lib%8)) bs = bs[:-1] + chr(b) asl = zbase62.b2a_l(bs, lib) assert len(asl) == zbase62.num_chars_that_this_many_octets_encode_to(numos) # the size of the base-62 encoding must be just right bs2l = zbase62.a2b_l(asl, lib) assert len(bs2l) == numos # the size of the result must be just right assert bs == bs2l
def decode_files_to_file(files, size, block_size, k, m, destination_path): """ decoded file stripes to file. @type files :C{list} @param files : list of abs path of file stripes , should end with '.n' , n is an integer. @type size : C{int} @param size : file original size. @type block_size : C{int} @param block_size : processing unit. @type k : C{int} @param k : k. @type m : C{int} @param m : m. @type destination_path : C{str} @param destination_path : abs path to put decoded file to. """ fdecoder = Decoder(k, m) file = open(destination_path, "w") parts = [int(path.splitext(f)[1][1:]) for f in files] streams = [] streams.extend([""] * k) file_shares = [open(f, "r") for f in files] file_shares_size = path.getsize(files[0]) * k block_count = mathutil.div_ceil(file_shares_size, block_size) for count in range(block_count): for i in range(k): streams[i] = file_shares[i].read(block_size) results = fdecoder.decode(streams, parts) for i in range(len(results)): file.write(results[i]) file.truncate(size) file.close()
def fdc_file(parts, block_size, k, m, file_name, file_dir, file_origin_size): fdecoder = Decoder(k, m) streams = [] streams.extend([""] * k) file = open(file_dir ,'w') files = [ open('./temp/'+ file_name + '.' + str(i),'r') for i in parts ] file_size = path.getsize('./temp/' + file_name +'.'+str(parts[0])) * k block_count = mathutil.div_ceil(file_size, block_size) for count in range(block_count) : for i in range(k) : streams[i] = files[i].read(block_size) results = fdecoder.decode(streams, parts) for i in range(len(results)) : file.write(results[i]) file.truncate(file_origin_size) file.close()
def fec_file(file, block_size, k, m): file_size = path.getsize(file.name) fencoder = Encoder(k, m) block_count = mathutil.div_ceil(file_size, block_size) #block_count_per_k = mathutil.div_ceil(block_count, k) ds = [] ds.extend([""] * k) #for the last round, block might be not complete or empty index = 0 results = [] results.extend([""] * m) for i in range(block_count / k): for index in range(k): ds[index] = file.read(block_size) ds[k-1] = ds[k-1] + "\x00" * (block_size - len(ds[k-1])) temp = fencoder.encode(ds) for j in range(m): results[j] = results[j] + temp[j] if block_count % k == 0 : return results #the last round for i in range(block_count % k): ds[i] = file.read(block_size) ds[i] = ds[i] + "\x00" * (block_size - len(ds[i])) for index in range(i+1,k): ds[index] = "\x00" * (len(ds[0])) temp = fencoder.encode(ds) for j in range(m): results[j] = results[j] + temp[j] return results
def b2a_l(os, lengthinbits): """ @param os the data to be encoded (a string) @param lengthinbits the number of bits of data in os to be encoded b2a_l() will generate a base-32 encoded string big enough to encode lengthinbits bits. So for example if os is 2 bytes long and lengthinbits is 15, then b2a_l() will generate a 3-character- long base-32 encoded string (since 3 quintets is sufficient to encode 15 bits). If os is 2 bytes long and lengthinbits is 16 (or None), then b2a_l() will generate a 4-character string. Note that if os is 2 bytes long and lengthinbits is 15, then the least-significant bit of os is ignored. Warning: if you generate a base-32 encoded string with b2a_l(), and then someone else tries to decode it by calling a2b() instead of a2b_l(), then they will (probably) get a different string than the one you encoded! So only use b2a_l() when you are sure that the encoding and decoding sides know exactly which lengthinbits to use. If you do not have a way for the encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and a2b(). The only drawback to using b2a() over b2a_l() is that when you have a number of bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-32 encoded string that is one or two characters longer than necessary. @return the contents of os in base-32 encoded form """ precondition(isinstance(lengthinbits, ( int, long, )), "lengthinbits is required to be an integer.", lengthinbits=lengthinbits) precondition( div_ceil(lengthinbits, 8) == len(os), "lengthinbits is required to specify a number of bits storable in exactly len(os) octets.", lengthinbits=lengthinbits, lenos=len(os)) # precondition((lengthinbits % 8==0) or ((ord(os[-1]) % (2**(8-(lengthinbits%8))))==0), "Any unused least-significant bits in os are required to be zero bits.", ord(os[-1]), lengthinbits=lengthinbits) # removing this precondition, because I like to use it with random os, like this: base32.b2a_l(file("/dev/urandom", "r").read(9), 65) os = [ord(o) for o in os] numquintets = div_ceil(lengthinbits, 5) numoctetsofdata = div_ceil(lengthinbits, 8) # print "numoctetsofdata: %s, len(os): %s, lengthinbits: %s, numquintets: %s" % (numoctetsofdata, len(os), lengthinbits, numquintets,) # zero out any unused bits in the final octet if lengthinbits % 8 != 0: os[-1] >>= (8 - (lengthinbits % 8)) os[-1] <<= (8 - (lengthinbits % 8)) # append zero octets for padding if needed numoctetsneeded = div_ceil(numquintets * 5, 8) + 1 os.extend([0] * (numoctetsneeded - len(os))) quintets = [] cutoff = 256 num = os[0] i = 0 while len(quintets) < numquintets: i = i + 1 assert len( os ) > i, "len(os): %s, i: %s, len(quintets): %s, numquintets: %s, lengthinbits: %s, numoctetsofdata: %s, numoctetsneeded: %s, os: %s" % ( len(os), i, len(quintets), numquintets, lengthinbits, numoctetsofdata, numoctetsneeded, os, ) num = num * 256 num = num + os[i] if cutoff == 1: cutoff = 256 continue cutoff = cutoff * 8 quintet = num / cutoff quintets.append(quintet) num = num - (quintet * cutoff) cutoff = cutoff / 32 quintet = num / cutoff quintets.append(quintet) num = num - (quintet * cutoff) if len(quintets) > numquintets: assert len(quintets) == ( numquintets + 1), "len(quintets): %s, numquintets: %s, quintets: %s" % ( len(quintets), numquintets, quintets, ) quintets = quintets[:numquintets] res = string.translate(''.join([chr(q) for q in quintets]), v2ctranstable) assert could_be_base32_encoded_l( res, lengthinbits), "lengthinbits: %s, res: %s" % ( lengthinbits, res, ) return res
def b2a_l(os, lengthinbits): """ @param os the data to be encoded (a string) @param lengthinbits the number of bits of data in os to be encoded b2a_l() will generate a base-32 encoded string big enough to encode lengthinbits bits. So for example if os is 2 bytes long and lengthinbits is 15, then b2a_l() will generate a 3-character- long base-32 encoded string (since 3 quintets is sufficient to encode 15 bits). If os is 2 bytes long and lengthinbits is 16 (or None), then b2a_l() will generate a 4-character string. Note that if os is 2 bytes long and lengthinbits is 15, then the least-significant bit of os is ignored. Warning: if you generate a base-32 encoded string with b2a_l(), and then someone else tries to decode it by calling a2b() instead of a2b_l(), then they will (probably) get a different string than the one you encoded! So only use b2a_l() when you are sure that the encoding and decoding sides know exactly which lengthinbits to use. If you do not have a way for the encoder and the decoder to agree upon the lengthinbits, then it is best to use b2a() and a2b(). The only drawback to using b2a() over b2a_l() is that when you have a number of bits to encode that is not a multiple of 8, b2a() can sometimes generate a base-32 encoded string that is one or two characters longer than necessary. @return the contents of os in base-32 encoded form """ precondition(isinstance(lengthinbits, (int, long,)), "lengthinbits is required to be an integer.", lengthinbits=lengthinbits) precondition(div_ceil(lengthinbits, 8) == len(os), "lengthinbits is required to specify a number of bits storable in exactly len(os) octets.", lengthinbits=lengthinbits, lenos=len(os)) # precondition((lengthinbits % 8==0) or ((ord(os[-1]) % (2**(8-(lengthinbits%8))))==0), "Any unused least-significant bits in os are required to be zero bits.", ord(os[-1]), lengthinbits=lengthinbits) # removing this precondition, because I like to use it with random os, like this: base32.b2a_l(file("/dev/urandom", "r").read(9), 65) os = [ord(o) for o in os] numquintets = div_ceil(lengthinbits, 5) numoctetsofdata = div_ceil(lengthinbits, 8) # print "numoctetsofdata: %s, len(os): %s, lengthinbits: %s, numquintets: %s" % (numoctetsofdata, len(os), lengthinbits, numquintets,) # zero out any unused bits in the final octet if lengthinbits % 8 != 0: os[-1] >>= (8-(lengthinbits % 8)) os[-1] <<= (8-(lengthinbits % 8)) # append zero octets for padding if needed numoctetsneeded = div_ceil(numquintets*5, 8) + 1 os.extend([0]*(numoctetsneeded-len(os))) quintets = [] cutoff = 256 num = os[0] i = 0 while len(quintets) < numquintets: i = i + 1 assert len(os) > i, "len(os): %s, i: %s, len(quintets): %s, numquintets: %s, lengthinbits: %s, numoctetsofdata: %s, numoctetsneeded: %s, os: %s" % (len(os), i, len(quintets), numquintets, lengthinbits, numoctetsofdata, numoctetsneeded, os,) num = num * 256 num = num + os[i] if cutoff == 1: cutoff = 256 continue cutoff = cutoff * 8 quintet = num / cutoff quintets.append(quintet) num = num - (quintet * cutoff) cutoff = cutoff / 32 quintet = num / cutoff quintets.append(quintet) num = num - (quintet * cutoff) if len(quintets) > numquintets: assert len(quintets) == (numquintets+1), "len(quintets): %s, numquintets: %s, quintets: %s" % (len(quintets), numquintets, quintets,) quintets = quintets[:numquintets] res = string.translate(''.join([chr(q) for q in quintets]), v2ctranstable) assert could_be_base32_encoded_l(res, lengthinbits), "lengthinbits: %s, res: %s" % (lengthinbits, res,) return res