def actual_table(output=True): """Produce sample table to use for curve fitting.""" # Sample data xvals = [100, 1000, 10000] yvals = [0.063, 0.565, 5.946] # Coefficients are returned as first argument if numpy_error: a, b = 0, 0 else: import numpy as np from scipy.optimize import curve_fit [(a, b), _] = curve_fit(linear_model, np.array(xvals), np.array(yvals)) if output: print('Linear = {}*N + {}'.format(a, b)) [(qa, qb), _] = curve_fit(quadratic_model, np.array(xvals), np.array(yvals)) if output: print('Quadratic = {}*N*N + {}*N'.format(qa, qb)) [(na), _] = curve_fit(n_log_n_model, np.array(xvals), np.array(yvals)) if output: print('N Log N = {}*N*log N'.format(na)) tbl = DataTable([8, 8, 8], ['N', 'Actual', 'Model'], output=output) tbl.row([100, 0.063, linear_model(100, a, b)]) tbl.row([1000, 0.565, linear_model(1000, a, b)]) tbl.row([10000, 5.946, linear_model(10000, a, b)]) print(tbl.pearsonr('Actual', 'Model')) return tbl
def prototype_table(output=True, decimals=3): """ Generate table of results for prototype application. The prototype application is simply a request to sort the N values. """ trials = [100, 1000, 10000] nvals = [] yvals = [] for n in trials: sort_time = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=100, number=100)) nvals.append(n) yvals.append(sort_time) def quad_model(n, a, b): if a < 0: # attempt to PREVENT negative coefficient. return 1e10 return a * n * n + b * n # Coefficients are returned as first argument if numpy_error: nlog_n_coeffs = linear_coeffs = quadratic_coeffs = [0, 0] else: import numpy as np from scipy.optimize import curve_fit [nlog_n_coeffs, _] = curve_fit(n_log_n_model, np.array(nvals), np.array(yvals)) [linear_coeffs, _] = curve_fit(linear_model, np.array(nvals), np.array(yvals)) [quadratic_coeffs, _] = curve_fit(quad_model, np.array(nvals), np.array(yvals)) if output: print('Linear = {:f}*N + {:f}'.format(linear_coeffs[0], linear_coeffs[1])) print('Quadratic = {}*N*N + {}*N'.format(quadratic_coeffs[0], quadratic_coeffs[1])) print('N Log N = {:.12f}*N*log2(N)'.format(nlog_n_coeffs[0])) print() tbl = DataTable([12, 10, 10, 10, 10], ['N', 'Time', 'Linear', 'Quad', 'NLogN'], output=output, decimals=decimals) for n, p in zip(nvals, yvals): tbl.row([ n, p, linear_model(n, linear_coeffs[0], linear_coeffs[1]), quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, nlog_n_coeffs[0]) ]) for n in [100000, 1000000, 10000000]: sort_time = 1000 * min( timeit.repeat(stmt='x.sort()', setup=''' import random x=list(range({})) random.shuffle(x)'''.format(n), repeat=100, number=100)) tbl.row([ n, sort_time, linear_model(n, linear_coeffs[0], linear_coeffs[1]), quadratic_model(n, quadratic_coeffs[0], quadratic_coeffs[1]), n_log_n_model(n, nlog_n_coeffs[0]) ]) if output: print('Linear', tbl.pearsonr('Time', 'Linear')) print('Quad', tbl.pearsonr('Time', 'Quad')) print('NLogN', tbl.pearsonr('Time', 'NLogN')) print(tbl.best_model('Time')) return tbl