示例#1
0
def debug_laplace():
    # Load data set
    X, y, D, Xtest, ytest = gpml.load_mat(
        '../data/kfold_data/r_concrete_500_fold_10_of_10.mat', y_dim=1)
    # Load the suspicious kernel
    sk = fk.repr_string_to_kernel(
        'ScoredKernel(k_opt=ProductKernel([ MaskKernel(ndim=8, active_dimension=0, base_kernel=CubicKernel(offset=1.757755, output_variance=7.084045)), MaskKernel(ndim=8, active_dimension=7, base_kernel=SqExpPeriodicKernel(lengthscale=-2.701080, period=-0.380918, output_variance=-0.071214)) ]), nll=6348.096611, laplace_nle=-184450132.068237, bic_nle=12720.630212, noise=[-1.77276072])'
    )
    # Create some code to evaluate it
    if X.ndim == 1: X = X[:, nax]
    if y.ndim == 1: y = y[:, nax]
    ndata = y.shape[0]

    # Create data file
    data_file = cblparallel.create_temp_file('.mat')
    scipy.io.savemat(data_file, {'X': X, 'y': y})  # Save regression data

    # Move to fear
    cblparallel.copy_to_remote(data_file)
    scripts = [
        gpml.OPTIMIZE_KERNEL_CODE % {
            'datafile':
            data_file.split('/')[-1],
            'writefile':
            '%(output_file)s',  # N.B. cblparallel manages output files
            'gpml_path':
            cblparallel.gpml_path(local_computation=False),
            'kernel_family':
            sk.k_opt.gpml_kernel_expression(),
            'kernel_params':
            '[ %s ]' % ' '.join(str(p) for p in sk.k_opt.param_vector()),
            'noise':
            str(sk.noise),
            'iters':
            str(300)
        }
    ]
    #### Need to be careful with % signs
    #### For the moment, cblparallel expects no single % signs - FIXME
    scripts[0] = re.sub('% ', '%% ', scripts[0])

    # Test

    scripts[0] = re.sub('delta = 1e-6', 'delta = 1e-6', scripts[0])
    #scripts[0] = re.sub('hyp.lik = [-1.77276072]', 'hyp.lik = [-0.77276072]', scripts[0])

    output_file = cblparallel.run_batch_on_fear(scripts,
                                                language='matlab',
                                                max_jobs=600)[0]

    # Read in results
    output = gpml.read_outputs(output_file)
    result = ScoredKernel.from_matlab_output(output, sk.k_opt.family(), ndata)
    print result
    print output.hessian

    os.remove(output_file)
    # Remove temporary data file (perhaps on the cluster server)
    cblparallel.remove_temp_file(data_file, local_computation=False)
示例#2
0
def debug_laplace():
    # Load data set
    X, y, D, Xtest, ytest = gpml.load_mat('../data/kfold_data/r_concrete_500_fold_10_of_10.mat', y_dim=1)
    # Load the suspicious kernel
    sk = fk.repr_string_to_kernel('ScoredKernel(k_opt=ProductKernel([ MaskKernel(ndim=8, active_dimension=0, base_kernel=CubicKernel(offset=1.757755, output_variance=7.084045)), MaskKernel(ndim=8, active_dimension=7, base_kernel=SqExpPeriodicKernel(lengthscale=-2.701080, period=-0.380918, output_variance=-0.071214)) ]), nll=6348.096611, laplace_nle=-184450132.068237, bic_nle=12720.630212, noise=[-1.77276072])')
    # Create some code to evaluate it
    if X.ndim == 1: X = X[:, nax]
    if y.ndim == 1: y = y[:, nax]
    ndata = y.shape[0]  
    
    
    # Create data file
    data_file = cblparallel.create_temp_file('.mat')
    scipy.io.savemat(data_file, {'X': X, 'y': y}) # Save regression data
    
    # Move to fear
    cblparallel.copy_to_remote(data_file)
    scripts = [gpml.OPTIMIZE_KERNEL_CODE % {'datafile': data_file.split('/')[-1],
                                              'writefile': '%(output_file)s', # N.B. cblparallel manages output files
                                              'gpml_path': cblparallel.gpml_path(local_computation=False),
                                              'kernel_family': sk.k_opt.gpml_kernel_expression(),
                                              'kernel_params': '[ %s ]' % ' '.join(str(p) for p in sk.k_opt.param_vector()),
                                              'noise': str(sk.noise),
                                              'iters': str(300)}]
    #### Need to be careful with % signs
    #### For the moment, cblparallel expects no single % signs - FIXME
    scripts[0] = re.sub('% ', '%% ', scripts[0])
    
    # Test
    
    scripts[0] = re.sub('delta = 1e-6', 'delta = 1e-6', scripts[0])
    #scripts[0] = re.sub('hyp.lik = [-1.77276072]', 'hyp.lik = [-0.77276072]', scripts[0])
    
    output_file = cblparallel.run_batch_on_fear(scripts, language='matlab', max_jobs=600)[0]  
    
    # Read in results
    output = gpml.read_outputs(output_file)
    result = ScoredKernel.from_matlab_output(output, sk.k_opt.family(), ndata)
    print result
    print output.hessian
    
    os.remove(output_file)
    # Remove temporary data file (perhaps on the cluster server)
    cblparallel.remove_temp_file(data_file, local_computation=False)
示例#3
0
def evaluate_kernels(kernels,
                     X,
                     y,
                     verbose=True,
                     noise=None,
                     iters=300,
                     local_computation=False,
                     zip_files=False,
                     max_jobs=500,
                     zero_mean=False,
                     random_seed=0):
    '''
    Sets up the kernel optimisation and nll calculation experiments, returns the results as scored kernels
    Input:
     - kernels           - A list of kernels (i.e. not scored kernels)
     - X                 - A matrix (data_points x dimensions) of input locations
     - y                 - A matrix (data_points x 1) of output values
     - ...
    Return:
     - A list of ScoredKernel objects
    '''

    # Make data into matrices in case they're unidimensional.
    if X.ndim == 1: X = X[:, nax]
    if y.ndim == 1: y = y[:, nax]
    ndata = y.shape[0]

    # Set default noise using a heuristic.
    if noise is None:
        noise = np.log(np.var(y) / 10)

    # Create data file
    if verbose:
        print 'Creating data file locally'
    data_file = cblparallel.create_temp_file('.mat')
    scipy.io.savemat(data_file, {'X': X, 'y': y})  # Save regression data

    # Move to fear if necessary
    if not local_computation:
        if verbose:
            print 'Moving data file to fear'
        cblparallel.copy_to_remote(data_file)

    # Create a list of MATLAB scripts to assess and optimise parameters for each kernel
    if verbose:
        print 'Creating scripts'
    scripts = [None] * len(kernels)
    for (i, kernel) in enumerate(kernels):
        x = kernel.param_vector()
        parameters = {
            'datafile':
            data_file.split('/')[-1],
            'writefile':
            '%(output_file)s',  # N.B. cblparallel manages output files
            'gpml_path':
            cblparallel.gpml_path(local_computation),
            'kernel_family':
            kernel.gpml_kernel_expression(),
            'kernel_params':
            '[ %s ]' % ' '.join(str(p) for p in x if type(p) != list),
            'eff_dimensions':
            '[ %s ]' % ';'.join(str(p) for p in x if type(p) == list),
            'dim_positions':
            '[ %s ]' %
            ' '.join(str(i) for i in range(len(x)) if type(x[i]) == list),
            'noise':
            str(noise),
            'iters':
            str(iters),
            'seed':
            str(random_seed)
        }
        print parameters['kernel_params'], parameters[
            'eff_dimensions'], parameters['dim_positions']
        if zero_mean:
            scripts[i] = gpml.OPTIMIZE_KERNEL_CODE_ZERO_MEAN % parameters
        else:
            scripts[i] = gpml.OPTIMIZE_KERNEL_CODE % parameters
        #### Need to be careful with % signs
        #### For the moment, cblparallel expects no single % signs - FIXME
        scripts[i] = re.sub('% ', '%% ', scripts[i])

    # Send to cblparallel and save output_files
    if verbose:
        print 'Sending scripts to cblparallel'
    if local_computation:
        output_files = cblparallel.run_batch_locally(scripts,
                                                     language='matlab',
                                                     max_cpu=1.1,
                                                     job_check_sleep=5,
                                                     submit_sleep=0.1,
                                                     max_running_jobs=10,
                                                     verbose=verbose)
    else:
        output_files = cblparallel.run_batch_on_fear(scripts,
                                                     language='matlab',
                                                     max_jobs=max_jobs,
                                                     verbose=verbose,
                                                     zip_files=zip_files)

    # Read in results
    results = [None] * len(kernels)
    for (i, output_file) in enumerate(output_files):
        if verbose:
            print 'Reading output file %d of %d: %s' % (i + 1, len(kernels),
                                                        output_file)
        results[i] = ScoredKernel.from_matlab_output(
            gpml.read_outputs(output_file), kernels[i].family(), ndata)

    # Tidy up local output files
    for (i, output_file) in enumerate(output_files):
        if verbose:
            print 'Removing output file %d of %d' % (i + 1, len(kernels))
        os.remove(output_file)
    # Remove temporary data file (perhaps on the cluster server)
    cblparallel.remove_temp_file(data_file, local_computation)

    # Return results i.e. list of ScoredKernel objects
    return results
def evaluate_kernels(kernels, X, y, verbose=True, noise=None, iters=300, local_computation=False, zip_files=False, max_jobs=500, zero_mean=False, random_seed=0):
    '''
    Sets up the kernel optimisation and nll calculation experiments, returns the results as scored kernels
    Input:
     - kernels           - A list of kernels (i.e. not scored kernels)
     - X                 - A matrix (data_points x dimensions) of input locations
     - y                 - A matrix (data_points x 1) of output values
     - ...
    Return:
     - A list of ScoredKernel objects
    '''
   
    # Make data into matrices in case they're unidimensional.
    if X.ndim == 1: X = X[:, nax]
    if y.ndim == 1: y = y[:, nax]
    ndata = y.shape[0]
        
    # Set default noise using a heuristic.    
    if noise is None:
        noise = np.log(np.var(y)/10)
    
    # Create data file
    if verbose:
        print 'Creating data file locally'
    data_file = cblparallel.create_temp_file('.mat')
    scipy.io.savemat(data_file, {'X': X, 'y': y}) # Save regression data
    
    # Move to fear if necessary
    if not local_computation:
        if verbose:
            print 'Moving data file to fear'
        cblparallel.copy_to_remote(data_file)
    
    # Create a list of MATLAB scripts to assess and optimise parameters for each kernel
    if verbose:
        print 'Creating scripts'
    scripts = [None] * len(kernels)
    for (i, kernel) in enumerate(kernels):
        parameters = {'datafile': data_file.split('/')[-1],
                      'writefile': '%(output_file)s', # N.B. cblparallel manages output files
                      'gpml_path': cblparallel.gpml_path(local_computation),
                      'kernel_family': kernel.gpml_kernel_expression(),
                      'kernel_params': '[ %s ]' % ' '.join(str(p) for p in kernel.param_vector()),
                      'noise': str(noise),
                      'iters': str(iters),
                      'seed': str(random_seed)}
        if zero_mean:
            scripts[i] = gpml.OPTIMIZE_KERNEL_CODE_ZERO_MEAN % parameters
        else:
            scripts[i] = gpml.OPTIMIZE_KERNEL_CODE % parameters
        #### Need to be careful with % signs
        #### For the moment, cblparallel expects no single % signs - FIXME
        scripts[i] = re.sub('% ', '%% ', scripts[i])
    
    # Send to cblparallel and save output_files
    if verbose:
        print 'Sending scripts to cblparallel'
    if local_computation:
        output_files = cblparallel.run_batch_locally(scripts, language='matlab', max_cpu=1.1, job_check_sleep=5, submit_sleep=0.1, max_running_jobs=10, verbose=verbose)  
    else:
        output_files = cblparallel.run_batch_on_fear(scripts, language='matlab', max_jobs=max_jobs, verbose=verbose, zip_files=zip_files)  
    
    # Read in results
    results = [None] * len(kernels)
    for (i, output_file) in enumerate(output_files):
        if verbose:
            print 'Reading output file %d of %d' % (i + 1, len(kernels))
        results[i] = ScoredKernel.from_matlab_output(gpml.read_outputs(output_file), kernels[i].family(), ndata)
    
    # Tidy up local output files
    for (i, output_file) in enumerate(output_files):
        if verbose:
            print 'Removing output file %d of %d' % (i + 1, len(kernels)) 
        os.remove(output_file)
    # Remove temporary data file (perhaps on the cluster server)
    cblparallel.remove_temp_file(data_file, local_computation)
    
    # Return results i.e. list of ScoredKernel objects
    return results