示例#1
0
文件: data.py 项目: kushs123/pynam
        def ncr(n, r):
            # Handle special cases
            if (n < r) or (r < 0) or (n < 0):
                return 0
            if (n == r) or (r == 0):
                return 1

            # If the parent choices in the parent node is larger than the
            # maximum sample count, we don't have to be that exact
            vmax = 0x7FFFFFFF
            lmax = 9.33
            ptotal = vmax if parent is None else parent.total
            if ptotal >= vmax:
                # Check the lower bound nCr >= (n/r)^r
                lmin_val = r * math.log(n / float(r))
                if (lmin_val > lmax):
                    return vmax

            # Use the lnncrr function to compute ncr (faster for large r)
            if r < 100:
                return entropy.ncr(n, r)
            res = entropy.lnncrr(n, r)
            if res > lmax:
                return vmax
            return int(round(math.exp(res)))
fig.savefig("out/sketch_info.pdf", format='pdf', bbox_inches='tight')

print("Plotting information per sample...")
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(1, 1, 1)
ax.plot(xs, info / xs, label="Information per sample", lw=0.75, color="k")
ax.plot(xs, n_false_positives_mean, '--', label="False positives per sample", lw=0.75, color="#3465a4")
ax.plot(xs, n_false_positives_min, ':', lw=0.25, color="#3465a4")
ax.plot(xs, n_false_positives_max, ':', lw=0.25, color="#3465a4")

ax.plot([0, n_samples], [n_bits - n_ones, n_bits - n_ones], '--', lw=0.5, color="#3465a4")
ax.annotate(s="\\textit{Maximum false positives}", xy=(n_samples * 0.975, n_bits - n_ones),
        verticalalignment="bottom", horizontalalignment="right", fontsize=8.0)

mInfo = entropy.lnncrr(n_bits, n_ones) / math.log(2.0)
ax.plot([0, n_samples], [mInfo, mInfo], '-', lw=0.5, color="k")
ax.annotate(s="\\textit{Maximum information}", xy=(n_samples * 0.975, mInfo),
        verticalalignment="bottom", horizontalalignment="right", fontsize=8.0)


ax.set_xlim(1, n_samples)
ax.set_xlabel("Sample count $N$")
ax.set_ylabel("Bits")
ax.legend(loc='lower center', bbox_to_anchor=(0.5, 1.05), ncol=2)
fig.savefig("out/sketch_info_per_sample.pdf", format='pdf', bbox_inches='tight')

print("Plotting errors...")
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(1, 1, 1)
ax.plot(xs, n_false_positives_mean, label="False positives", color="#3465a4")