def simple_gef_load_experiment(verbose=True): '''A first version of an experiment learning kernels''' seed_kernels = [ fk.MaskKernel(2, 0, fk.SqExpKernel(0, 0)), fk.MaskKernel(2, 1, fk.SqExpKernel(0, 0)) ] X, y = load_simple_gef_load() # subsample data. X = X[0:99, :] y = y[0:99, :] max_depth = 5 k = 2 # Expand k best nll_key = 1 BIC_key = 2 active_key = BIC_key results = [] for dummy in range(max_depth): new_results = structure_search.try_expanded_kernels( X, y, D=2, seed_kernels=seed_kernels, verbose=verbose) results = results + new_results print results = sorted(results, key=lambda p: p[active_key], reverse=True) for kernel, nll, BIC in results: print nll, BIC, kernel.pretty_print() seed_kernels = [ r[0] for r in sorted(new_results, key=lambda p: p[active_key])[0:k] ]
def expand_test2(): k1 = fk.MaskKernel(2, 0, fk.SqExpKernel(1, 1)) k2 = fk.MaskKernel(2, 1, fk.SqExpPeriodicKernel(2, 2, 2)) e = fk.SumKernel([k1, k2]) g = grammar.MultiDGrammar(2) print '' for f in grammar.expand(e, g): print f.pretty_print() print grammar.canonical(f).pretty_print() print print ' ***** duplicates removed *****' print kernels = grammar.expand(e, g) for f in grammar.remove_duplicates(kernels): print f.pretty_print() print print '%d originally, %d without duplicates' % ( len(kernels), len(grammar.remove_duplicates(kernels))) print 'expand_test complete'
def plot_gef_load_Z01_smooth_2d_mean(): X, y, D = fear_load_mat('../data/gef_load_full_Xy.mat', 1) kernel = fk.MaskKernel(D, 0, fk.RQKernel(0.268353, -0.104149, -2.105742)) * fk.MaskKernel(D, 9, fk.SqExpKernel(1.160242, 0.004344)) * \ (fk.MaskKernel(D, 0, fk.SqExpPeriodicKernel(-0.823413, 0.000198, -0.917064)) + fk.MaskKernel(D, 0, fk.RQKernel(-0.459219, -0.077250, -2.212718))) kernel_1 = fk.MaskKernel(D, 0, fk.RQKernel(0.268353, -0.104149, -2.105742)) * fk.MaskKernel(D, 9, fk.SqExpKernel(1.160242, 0.004344)) * \ fk.MaskKernel(D, 0, fk.SqExpPeriodicKernel(-0.823413, 0.000198, -0.917064)) kernel_2 = fk.MaskKernel(D, 0, fk.RQKernel(0.268353, -0.104149, -2.105742)) * fk.MaskKernel(D, 9, fk.SqExpKernel(1.160242, 0.004344)) * \ fk.MaskKernel(D, 0, fk.RQKernel(-0.459219, -0.077250, -2.212718)) min_T = -3.0 max_T = 1.0 N_T = 10 temps = np.repeat(np.linspace(min_T, max_T, N_T), 499) input = np.tile(X[0:499, :], (N_T, 1)) input[:, 9] = temps posterior_mean = gpml.posterior_mean(kernel, kernel_2, X[0:499, :], y[0:499], input, iters=300) X_plt = X[0:499, 0] Y_plt = np.linspace(min_T, max_T, N_T) Z_plt = np.reshape(posterior_mean, (N_T, 499), 'A') data = {'X': X_plt, 'Y': Y_plt, 'Z': Z_plt, 'post_mean': posterior_mean} scipy.io.savemat('temp_data.mat', data)
def fear_experiment(data_file, results_filename, y_dim=1, subset=None, max_depth=2, k=2, verbose=True, sleep_time=60, n_sleep_timeout=20, re_submit_wait=60, \ description=''): '''Recursively search for the best kernel''' X, y, D = fear_load_mat(data_file, y_dim) # Subset if necessary if not subset is None: X = X[subset, :] y = y[subset] ##### This should be abstracted seed_kernels = [fk.MaskKernel(D, i, fk.SqExpKernel(0., 0.)) for i in range(D)] + \ [fk.MaskKernel(D, i, fk.SqExpPeriodicKernel(0., 0., 0.)) for i in range(D)] + \ [fk.MaskKernel(D, i, fk.RQKernel(0., 0., 0.)) for i in range(D)] nll_key = 1 laplace_key = 2 BIC_key = 3 active_key = BIC_key results = [] results_sequence = [] for r in range(max_depth): if r == 0: new_results = fear_run_experiments(seed_kernels, X, y, verbose=verbose, \ sleep_time=sleep_time, n_sleep_timeout=n_sleep_timeout, re_submit_wait=re_submit_wait) else: new_results = fear_run_experiments(fear_expand_kernels(D, seed_kernels, verbose=verbose), X, y, verbose=verbose, \ sleep_time=sleep_time, n_sleep_timeout=n_sleep_timeout, re_submit_wait=re_submit_wait) results = results + new_results print results = sorted(results, key=lambda p: p[active_key], reverse=True) for kernel, nll, laplace, BIC in results: print nll, laplace, BIC, kernel.pretty_print() seed_kernels = [ r[0] for r in sorted(new_results, key=lambda p: p[active_key])[0:k] ] results_sequence.append(results) # Write results to a file results = sorted(results, key=lambda p: p[active_key], reverse=True) with open(results_filename, 'w') as outfile: outfile.write( 'Experiment results for\n datafile = %s\n y_dim = %d\n subset = %s\n max_depth = %f\n k = %f\n Description = %s\n\n' % (data_file, y_dim, subset, max_depth, k, description)) for (i, results) in enumerate(results_sequence): outfile.write('\n%%%%%%%%%% Level %d %%%%%%%%%%\n\n' % i) for kernel, nll, laplace, BIC in results: outfile.write('nll=%f, laplace=%f, BIC=%f, kernel=%s\n' % (nll, laplace, BIC, kernel.__repr__()))
def canonical(kernel): '''Sorts a kernel tree into a canonical form.''' if isinstance(kernel, fk.BaseKernel): return kernel.copy() elif isinstance(kernel, fk.MaskKernel): return fk.MaskKernel(kernel.ndim, kernel.active_dimension, canonical(kernel.base_kernel)) elif isinstance(kernel, fk.SumKernel): new_ops = [] for op in kernel.operands: op_canon = canonical(op) if isinstance(op, fk.SumKernel): new_ops += op_canon.operands else: new_ops.append(op_canon) return fk.SumKernel(sorted(new_ops)) elif isinstance(kernel, fk.ProductKernel): new_ops = [] for op in kernel.operands: op_canon = canonical(op) if isinstance(op, fk.ProductKernel): new_ops += op_canon.operands else: new_ops.append(op_canon) return fk.ProductKernel(sorted(new_ops)) else: raise RuntimeError('Unknown kernel class:', kernel.__class__)
def plot_gef_load_Z01(): # This kernel was chosen from a run of gef_load datapoints. # kernel = eval(ProductKernel([ covMask(ndim=12, active_dimension=0, base_kernel=RQKernel(lengthscale=0.268353, output_variance=-0.104149, alpha=-2.105742)), covMask(ndim=12, active_dimension=9, base_kernel=SqExpKernel(lengthscale=1.160242, output_variance=0.004344)), SumKernel([ covMask(ndim=12, active_dimension=0, base_kernel=SqExpPeriodicKernel(lengthscale=-0.823413, period=0.000198, output_variance=-0.917064)), covMask(ndim=12, active_dimension=0, base_kernel=RQKernel(lengthscale=-0.459219, output_variance=-0.077250, alpha=-2.212718)) ]) ])) X, y, D = fear_load_mat('../data/gef_load_full_Xy.mat', 1) kernel = fk.MaskKernel(D, 0, fk.RQKernel(0.268353, -0.104149, -2.105742)) * fk.MaskKernel(D, 9, fk.SqExpKernel(1.160242, 0.004344)) * \ (fk.MaskKernel(D, 0, fk.SqExpPeriodicKernel(-0.823413, 0.000198, -0.917064)) + fk.MaskKernel(D, 0, fk.RQKernel(-0.459219, -0.077250, -2.212718))) # Todo: set random seed. sample = gpml.sample_from_gp_prior(kernel, X[0:499, :]) pylab.figure() pylab.plot(X[0:499, 0], y[0:499]) pylab.title('GEFCom2012 Z01 and T09 - first 500 data points') pylab.xlabel('Time') pylab.ylabel('Load')
def plot_gef_load_Z01_split_mean_temp(): X, y, D = fear_load_mat('../data/gef_load_full_Xy.mat', 1) kernel = fk.MaskKernel(D, 0, fk.RQKernel(0.268353, -0.104149, -2.105742)) * fk.MaskKernel(D, 9, fk.SqExpKernel(1.160242, 0.004344)) * \ (fk.MaskKernel(D, 0, fk.SqExpPeriodicKernel(-0.823413, 0.000198, -0.917064)) + fk.MaskKernel(D, 0, fk.RQKernel(-0.459219, -0.077250, -2.212718))) kernel_1 = fk.MaskKernel(D, 0, fk.RQKernel(0.268353, -0.104149, -2.105742)) * fk.MaskKernel(D, 9, fk.SqExpKernel(1.160242, 0.004344)) * \ fk.MaskKernel(D, 0, fk.SqExpPeriodicKernel(-0.823413, 0.000198, -0.917064)) posterior_mean_1 = gpml.posterior_mean(kernel, kernel_1, X[0:499, :], y[0:499], iters=10) kernel_2 = fk.MaskKernel(D, 0, fk.RQKernel(0.268353, -0.104149, -2.105742)) * fk.MaskKernel(D, 9, fk.SqExpKernel(1.160242, 0.004344)) * \ fk.MaskKernel(D, 0, fk.RQKernel(-0.459219, -0.077250, -2.212718)) posterior_mean_2 = gpml.posterior_mean(kernel, kernel_2, X[0:499, :], y[0:499], iters=10) plt.figure() host = host_subplot(111, axes_class=AA.Axes) plt.subplots_adjust(right=0.85) par1 = host.twinx() # host.set_xlim(0, 2) # host.set_ylim(0, 2) host.set_xlabel("Temperature (T09)") # par1.set_ylabel("Periodic component") plt.title('Posterior mean function') host.set_ylabel("Load posterior mean") p2, = host.plot(X[0:499, 9], y[0:499], 'o', alpha=0.5) p1, = host.plot(X[0:499, 9], posterior_mean_2, 'o') # par1.set_ylim(0, 4) host.legend() host.axis["left"].label.set_color(p1.get_color()) # par1.axis["right"].label.set_color(p2.get_color()) plt.draw() plt.show()
def full_gef_load_experiment(zone=1, max_depth=5, verbose=True): '''Round 2''' # seed_kernels = [fk.MaskKernel(2, 0, fk.SqExpKernel(0, 0)), # fk.MaskKernel(2, 1, fk.SqExpKernel(0, 0))] seed_kernels = [fk.MaskKernel(12, i, fk.SqExpKernel(0., 0.)) for i in range(12)] + \ [fk.MaskKernel(12, i, fk.SqExpPeriodicKernel(0., 0., 0.)) for i in range(12)] + \ [fk.MaskKernel(12, i, fk.RQKernel(0., 0., 0.)) for i in range(12)] X, y = load_full_gef_load() # subsample data. X = X[0:299, :] y = y[0:299, zone - 1] # max_depth = 5 k = 2 # Expand k best nll_key = 1 BIC_key = 2 active_key = BIC_key results = [] for i in range(max_depth): if i: expand = True else: expand = False new_results = structure_search.try_expanded_kernels( X, y, D=12, seed_kernels=seed_kernels, expand=expand, verbose=verbose) results = results + new_results print results = sorted(results, key=lambda p: p[active_key], reverse=True) for kernel, nll, BIC in results: print nll, BIC, kernel.pretty_print() seed_kernels = [ r[0] for r in sorted(new_results, key=lambda p: p[active_key])[0:k] ]
def expand(kernel, grammar): result = expand_single_tree(kernel, grammar) if isinstance(kernel, fk.BaseKernel): pass elif isinstance(kernel, fk.MaskKernel): result += [ fk.MaskKernel(kernel.ndim, kernel.active_dimension, e) for e in expand(kernel.base_kernel, grammar) ] elif isinstance(kernel, fk.SumKernel): for i, op in enumerate(kernel.operands): for e in expand(op, grammar): new_ops = kernel.operands[:i] + [e] + kernel.operands[i + 1:] new_ops = [op.copy() for op in new_ops] result.append(fk.SumKernel(new_ops)) elif isinstance(kernel, fk.ProductKernel): for i, op in enumerate(kernel.operands): for e in expand(op, grammar): new_ops = kernel.operands[:i] + [e] + kernel.operands[i + 1:] new_ops = [op.copy() for op in new_ops] result.append(fk.ProductKernel(new_ops)) else: raise RuntimeError('Unknown kernel class:', kernel.__class__) return result
def kernel_test(): k = fk.MaskKernel(4, 3, fk.SqExpKernel(0, 0)) print k.gpml_kernel_expression() print k.pretty_print() print '[%s]' % k.param_vector() print 'kernel_test complete'