print 'max comp:', maxcomp print 'input avgsize:', line_avg print 'input steps:', math.log(len(lines), 2) print 'target size:', target print steps = [0] cac = {} def compo(i): output = zlib.compress(''.join(lines[:i])) cac[i] = output print [i, len(output) - target] steps[0] += 1 return len(output) - target t0 = time.time() maxlines, actual = sortedfile.bisect_func_right(0, 1, len(lines), compo) maxlines -= 1 output = cac[maxlines] print print 'time taken:', 1000 * (time.time() - t0) print 'output steps:', steps print 'maxlines:', maxlines print 'maxlines len:', sum(map(len, lines[:maxlines])) print 'compression ratio:', sum(map(len, lines[:maxlines])) / float(len(output)) print 'outlen:', len(output)
print 'input steps:', math.log(len(lines), 2) print 'target size:', target print steps = [0] cac = {} def compo(i): output = zlib.compress(''.join(lines[:i])) cac[i] = output print[i, len(output) - target] steps[0] += 1 return len(output) - target t0 = time.time() maxlines, actual = sortedfile.bisect_func_right(0, 1, len(lines), compo) maxlines -= 1 output = cac[maxlines] print print 'time taken:', 1000 * (time.time() - t0) print 'output steps:', steps print 'maxlines:', maxlines print 'maxlines len:', sum(map(len, lines[:maxlines])) print 'compression ratio:', sum(map(len, lines[:maxlines])) / float( len(output)) print 'outlen:', len(output)
steps = [0] def compo(i): dat = zlib.compress(''.join(lines[:i])) actual = len(dat) ratio = actual/float(target) out = (ratio, dat) print[i, actual, ratio] steps[0]+=1 return out t0 = time.time() doink = sortedfile.bisect_func_right((1.0,), 1, len(lines), compo) maxlines, (ratio, output) = doink if maxlines and (ratio > 1.0): maxlines -= 1 ratio, output = compo(maxlines) print print 'time taken:', 1000 * (time.time() - t0) print 'output steps:', steps print 'maxlines:', maxlines print 'maxlines len:', sum(map(len, lines[:maxlines])) print 'target ratio:', ratio print 'outlen:', len(output) #print compo(50, (15000,))
steps = [0] def compo(i): dat = zlib.compress(''.join(lines[:i])) actual = len(dat) ratio = actual / float(target) out = (ratio, dat) print[i, actual, ratio] steps[0] += 1 return out t0 = time.time() doink = sortedfile.bisect_func_right((1.0, ), 1, len(lines), compo) maxlines, (ratio, output) = doink if maxlines and (ratio > 1.0): maxlines -= 1 ratio, output = compo(maxlines) print print 'time taken:', 1000 * (time.time() - t0) print 'output steps:', steps print 'maxlines:', maxlines print 'maxlines len:', sum(map(len, lines[:maxlines])) print 'target ratio:', ratio print 'outlen:', len(output) #print compo(50, (15000,))