def fit_model(self, state_list, action_list, win_in, verbose=True, max_iter=1000): """ Fits a Gaussian Process model to the state / action pairs passed in. This creates a model of the environment which is used during policy optimization instead of querying the environment directly. See mxfusion.gp_modules for additional types of GP models to fit, including Sparse GP and Stochastic Varitional Inference Sparse GP. """ X, Y = self.prepare_data(state_list, action_list, win_in) m = Model() m.N = Variable() m.X = Variable(shape=(m.N, X.shape[-1])) m.noise_var = Variable(shape=(1, ), transformation=PositiveTransformation(), initial_value=0.01) m.kernel = RBF(input_dim=X.shape[-1], variance=1, lengthscale=1, ARD=True) m.Y = GPRegression.define_variable(X=m.X, kernel=m.kernel, noise_var=m.noise_var, shape=(m.N, Y.shape[-1])) m.Y.factor.gp_log_pdf.jitter = 1e-6 infr = GradBasedInference( inference_algorithm=MAP(model=m, observed=[m.X, m.Y])) infr.run(X=mx.nd.array(X), Y=mx.nd.array(Y), max_iter=max_iter, learning_rate=0.1, verbose=verbose) return m, infr, X, Y
def make_gpregr_model(self, lengthscale, variance, noise_var): from mxfusion.models import Model from mxfusion.components.variables import Variable, PositiveTransformation from mxfusion.modules.gp_modules import GPRegression from mxfusion.components.distributions.gp.kernels import RBF dtype = 'float64' m = Model() m.N = Variable() m.X = Variable(shape=(m.N, 3)) m.noise_var = Variable(transformation=PositiveTransformation(), initial_value=mx.nd.array(noise_var, dtype=dtype)) kernel = RBF(input_dim=3, ARD=True, variance=mx.nd.array(variance, dtype=dtype), lengthscale=mx.nd.array(lengthscale, dtype=dtype), dtype=dtype) m.Y = GPRegression.define_variable(X=m.X, kernel=kernel, noise_var=m.noise_var, shape=(m.N, 1), dtype=dtype) return m