def writeHuffTable2(huffTable, f): # Get the number of entries in the Huffman table numEntries = len(huffTable.keys()) numEntriesOut = bin(numEntries)[2:] if len(numEntriesOut) > 32: print >> sys.stderr, "Number of entries is too long!" else: numEntriesOut = '0'*(32-len(numEntriesOut))+numEntriesOut remainder = 0 numLeft = 0 (remainder, numLeft) = bits.stringToBitsOut(numEntriesOut, f, remainder, numLeft) # Loop through the keys for myKey in huffTable.keys(): # Turn into binary representation the key, the codeword, and the length of the codeword keyOut = '' for i in xrange(len(myKey)): if myKey[i] == 'A': keyOut += '00' elif myKey[i] == 'C': keyOut += '01' elif myKey[i] == 'G': keyOut += '100' elif myKey[i] == 'T': keyOut += '101' elif myKey[i] == 'N': keyOut += '110' keyOut += '111' myCodeword = huffTable[myKey] myCodeLen = bin(len(huffTable[myKey]))[2:] # 8 bits is not enough for the length of the codeword, so there is an error if len(myCodeLen) > 8: print >> sys.stderr, "Codeword length is too long!" continue else: myCodeLen = '0'*(8-len(myCodeLen))+myCodeLen # Write out the key (remainder, numLeft) = bits.stringToBitsOut(keyOut, f, remainder, numLeft) # Write out the length of the codeword (remainder, numLeft) = bits.stringToBitsOut(myCodeLen, f, remainder, numLeft) # Write out the codeword (remainder, numLeft) = bits.stringToBitsOut(myCodeword, f, remainder, numLeft) bits.flushBitsOutput(f, remainder, numLeft)
def writeHuffTable(huffTable, f): # Get the number of entries in the Huffman table numEntries = len(huffTable.keys()) numEntriesOut = bin(numEntries)[2:] if len(numEntriesOut) > 32: print >> sys.stderr, "Number of entries is too long!" else: numEntriesOut = '0'*(32-len(numEntriesOut))+numEntriesOut remainder = 0 numLeft = 0 (remainder, numLeft) = bits.stringToBitsOut(numEntriesOut, f, remainder, numLeft) # Loop through the keys for myKey in huffTable.keys(): # Turn into binary representation the key, the codeword, and the length of the codeword keyOut = bin(int(myKey))[2:] myCodeword = huffTable[myKey] myCodeLen = bin(len(huffTable[myKey]))[2:] #print myKey #sys.stdout.flush() # 32 bits is not enough for the key, so there is an error if len(keyOut) > 32: print >> sys.stderr, "Key is too long!" continue else: keyOut = '0'*(32-len(keyOut))+keyOut # 8 bits is not enough for the length of the codeword, so there is an error if len(myCodeLen) > 8: print >> sys.stderr, "Codeword length is too long!" continue else: myCodeLen = '0'*(8-len(myCodeLen))+myCodeLen # Write out the key (remainder, numLeft) = bits.stringToBitsOut(keyOut, f, remainder, numLeft) # Write out the length of the codeword (remainder, numLeft) = bits.stringToBitsOut(myCodeLen, f, remainder, numLeft) # Write out the codeword (remainder, numLeft) = bits.stringToBitsOut(myCodeword, f, remainder, numLeft) bits.flushBitsOutput(f, remainder, numLeft)
def writeGolombCodedHuffTable(huffTable, f): # Get the number of entries in the Huffman table numEntries = len(huffTable.keys()) # Write out the number of entries in the Huffman table numEntriesOut = bin(numEntries)[2:] if len(numEntriesOut) > tableSize: print >> sys.stderr, "Number of entries is too long!" else: numEntriesOut = '0'*(tableSize-len(numEntriesOut))+numEntriesOut remainder = 0 numLeft = 0 (remainder, numLeft) = bits.stringToBitsOut(numEntriesOut, f, remainder, numLeft) # Now we need to get a sorted list of the key and item pairs sortedKeyValueList = [(int(key),val) for (key,val) in huffTable.iteritems()] sortedKeyValueList.sort() # Find where our first nonconsecutive number occurs sparseStart = 0 count = 0 ind = 0 ok = 0 while (ok == 0): ind = count while (sortedKeyValueList[ind-count][0] == ind): ok = 1 ind = ind + 1 sparseStart = ind-count count = count + 1 count = count - 1 # Write out the start of the sparse integers sparseStartOut = bin(sparseStart)[2:] if len(sparseStartOut) > denseSize: print >> sys.stderr, "Start of sparse integers too large!" else: sparseStartOut = '0'*(denseSize-len(sparseStartOut))+sparseStartOut (remainder, numLeft) = bits.stringToBitsOut(sparseStartOut, f, remainder, numLeft) # Calculate the deltas for the keys in the sparse region sparseDeltas = [sortedKeyValueList[i][0]-sortedKeyValueList[i-1][0]-1 for i in xrange(sparseStart,numEntries)] # Calculate M parameter in Golomb coding for the deltas M = sum(sparseDeltas)/len(sparseDeltas) # Write out count countOut = bin(count)[2:] if len(countOut) > countSize: print >> sys.stderr, "count value too large!" else: countOut = '0'*(countSize-len(countOut))+countOut (remainder, numLeft) = bits.stringToBitsOut(countOut, f, remainder, numLeft) # Write out M MOut = bin(M)[2:] if len(MOut) > MSize: print >> sys.stderr, "M value too large!" else: MOut = '0'*(MSize-len(MOut))+MOut (remainder, numLeft) = bits.stringToBitsOut(MOut, f, remainder, numLeft) # M also tells us the length (in bits) of our remainder part MLen = int(math.ceil(math.log(M,2))) # Loop through the sorted key/value list for ind in xrange(numEntries): # Write out the key if we're in the sparse region if (ind >= sparseStart): # Calculate the quotient and remainder of sparseDeltas divided by M quo = int(sparseDeltas[ind-sparseStart]/M) rem = sparseDeltas[ind-sparseStart]%M # Get the two parts of the Golomb key unaryPart = '1'*quo + '0' huffPart = bin(rem)[2:] huffPart = '0'*(MLen-len(huffPart))+huffPart # Golomb code the key keyOut = unaryPart + huffPart # Write out the key (remainder, numLeft) = bits.stringToBitsOut(keyOut, f, remainder, numLeft) # Get the codeword codeword = sortedKeyValueList[ind][1] # Get the length of the codeword in binary codeLen = bin(len(codeword))[2:] # Check the size of the codeword length if len(codeLen) > lenSize: print >> sys.stderr, "Codeword length is too long!" continue else: codeLen = '0'*(lenSize-len(codeLen))+codeLen # Write out the length of the codeword (remainder, numLeft) = bits.stringToBitsOut(codeLen, f, remainder, numLeft) # Write out the codeword (remainder, numLeft) = bits.stringToBitsOut(codeword, f, remainder, numLeft) bits.flushBitsOutput(f, remainder, numLeft)