def __init__(self, sensor_models, calibration_model, lr=1e-4, batch_size=20, log_dir=None, **kwargs): self.graph = T.core.Graph() self.log_dir = log_dir with self.graph.as_default(): self.calibration_model = calibration_model self.board_ids = list(sensor_models.keys()) self.board_map = {b: i for i, b in enumerate(self.board_ids)} self.sensor_map = sensor_models self.sensor_models = [ sensor_models[board_id] for board_id in self.board_ids ] self.architecture = pickle.dumps( [sensor_models, calibration_model]) self.batch_size = batch_size self.lr = lr self.learning_rate = T.placeholder(T.floatx(), []) self.sensors = T.placeholder(T.floatx(), [None, 3]) self.env = T.placeholder(T.floatx(), [None, 3]) self.board = T.placeholder(T.core.int32, [None]) self.boards = T.transpose( T.pack([self.board, T.range(T.shape(self.board)[0])])) self.rep = T.gather_nd( T.pack([ sensor_model(self.sensors) for sensor_model in self.sensor_models ]), self.boards) self.rep_ = T.placeholder(T.floatx(), [None, self.rep.get_shape()[-1]]) rep_env = T.concat([self.rep, self.env], -1) rep_env_ = T.concat([self.rep_, self.env], -1) self.y_ = self.calibration_model(rep_env) self.y_rep = self.calibration_model(rep_env_) self.y = T.placeholder(T.floatx(), [None, 2]) self.loss = T.mean((self.y - self.y_)**2) self.mae = T.mean(T.abs(self.y - self.y_)) T.core.summary.scalar('MSE', self.loss) T.core.summary.scalar('MAE', self.mae) self.summary = T.core.summary.merge_all() self.train_op = T.core.train.AdamOptimizer( self.learning_rate).minimize(self.loss) self.session = T.interactive_session(graph=self.graph)
def log_prior(self, leaf_values): return T.mean( log_normal(leaf_values, T.zeros_like(leaf_values, dtype='float32'), T.ones_like(leaf_values, dtype='float32'), self.embedding_size, dim=2))
def kl_divergence(self, q_X, q_A, _): # q_Xt - [N, H, ds] # q_At - [N, H, da] if (q_X, q_A) not in self.cache: info = {} if self.smooth: state_prior = stats.GaussianScaleDiag([ T.ones(self.ds), T.zeros(self.ds) ]) p_X = stats.LDS( (self.sufficient_statistics(), state_prior, None, q_A.expected_value(), self.horizon), 'internal') kl = T.mean(stats.kl_divergence(q_X, p_X), axis=0) Q = self.get_dynamics()[1] info['model-stdev'] = T.sqrt(T.matrix_diag_part(Q)) else: q_Xt = q_X.__class__([ q_X.get_parameters('regular')[0][:, :-1], q_X.get_parameters('regular')[1][:, :-1], ]) q_At = q_A.__class__([ q_A.get_parameters('regular')[0][:, :-1], q_A.get_parameters('regular')[1][:, :-1], ]) p_Xt1 = self.forward(q_Xt, q_At) q_Xt1 = q_X.__class__([ q_X.get_parameters('regular')[0][:, 1:], q_X.get_parameters('regular')[1][:, 1:], ]) rmse = T.sqrt(T.sum(T.square(q_Xt1.get_parameters('regular')[1] - p_Xt1.get_parameters('regular')[1]), axis=-1)) kl = T.mean(T.sum(stats.kl_divergence(q_Xt1, p_Xt1), axis=-1), axis=0) Q = self.get_dynamics()[1] model_stdev = T.sqrt(T.matrix_diag_part(Q)) info['rmse'] = rmse info['model-stdev'] = model_stdev self.cache[(q_X, q_A)] = kl, info return self.cache[(q_X, q_A)]
def __init__(self, features, model=None, batch_size=20, lr=1e-4): super(NeuralNetwork, self).__init__(features) self.graph = T.core.Graph() with self.graph.as_default(): self.architecture = pickle.dumps(model) self.model = model #Relu(6, 200) >> Relu(200) >> Relu(200) >> Relu(200) >> Linear(2) self.batch_size = batch_size self.lr = lr self.X = T.placeholder(T.floatx(), [None, 6]) self.y = T.placeholder(T.floatx(), [None, 2]) self.y_ = self.model(self.X) self.loss = T.mean((self.y - self.y_) ** 2) self.train_op = T.core.train.AdamOptimizer(self.lr).minimize(self.loss) self.session = T.interactive_session(graph=self.graph)
y[i, labels[i]] = 1 split = int(0.9 * N) train_idx, test_idx = idx[:split], idx[split:] Xtrain, Xtest = X[train_idx], X[test_idx] ytrain, ytest = y[train_idx], y[test_idx] X_in = T.placeholder(T.floatx(), [None, 28, 28, 1]) Y_in = T.placeholder(T.floatx(), [None, 10]) conv_net = Conv((2, 2, 10)) >> Conv((2, 2, 20)) >> Flatten() >> Linear(10) logits = conv_net(X_in) predictions = T.argmax(logits, -1) loss = T.mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y_in)) train_op = tf.train.AdamOptimizer(1e-3).minimize(loss) sess = T.interactive_session() def train(n_iter, batch_size=20): for i in range(n_iter): idx = np.random.permutation(Xtrain.shape[0])[:batch_size] result = sess.run([loss, train_op], { X_in : Xtrain[idx], Y_in : ytrain[idx] }) print("Loss:", result[0]) preds = sess.run(predictions, { X_in : Xtest }).astype(np.int32) print("Error: ", 1 - (preds == labels[test_idx]).sum() / float(N - split)) train(1000)
def kl_divergence(self, q_X, q_A, num_data): if (q_X, q_A) not in self.cache: if self.smooth: state_prior = stats.GaussianScaleDiag( [T.ones(self.ds), T.zeros(self.ds)]) self.p_X = stats.LDS( (self.sufficient_statistics(), state_prior, None, q_A.expected_value(), self.horizon), 'internal') local_kl = stats.kl_divergence(q_X, self.p_X) if self.time_varying: global_kl = T.sum( stats.kl_divergence(self.A_variational, self.A_prior)) else: global_kl = stats.kl_divergence(self.A_variational, self.A_prior) prior_kl = T.mean(local_kl, axis=0) + global_kl / T.to_float(num_data) A, Q = self.get_dynamics() model_stdev = T.sqrt(T.matrix_diag_part(Q)) self.cache[(q_X, q_A)] = prior_kl, { 'local-kl': local_kl, 'global-kl': global_kl, 'model-stdev': model_stdev, } else: q_Xt = q_X.__class__([ q_X.get_parameters('regular')[0][:, :-1], q_X.get_parameters('regular')[1][:, :-1], ]) q_At = q_A.__class__([ q_A.get_parameters('regular')[0][:, :-1], q_A.get_parameters('regular')[1][:, :-1], ]) p_Xt1 = self.forward(q_Xt, q_At) q_Xt1 = q_X.__class__([ q_X.get_parameters('regular')[0][:, 1:], q_X.get_parameters('regular')[1][:, 1:], ]) num_data = T.to_float(num_data) rmse = T.sqrt( T.sum(T.square( q_Xt1.get_parameters('regular')[1] - p_Xt1.get_parameters('regular')[1]), axis=-1)) A, Q = self.get_dynamics() model_stdev = T.sqrt(T.matrix_diag_part(Q)) local_kl = T.sum(stats.kl_divergence(q_Xt1, p_Xt1), axis=1) if self.time_varying: global_kl = T.sum( stats.kl_divergence(self.A_variational, self.A_prior)) else: global_kl = stats.kl_divergence(self.A_variational, self.A_prior) self.cache[(q_X, q_A)] = (T.mean(local_kl, axis=0) + global_kl / T.to_float(num_data), { 'rmse': rmse, 'model-stdev': model_stdev, 'local-kl': local_kl, 'global-kl': global_kl }) return self.cache[(q_X, q_A)]
def kl_divergence(self, q_X, q_A, num_data): mu_shape = T.shape(q_X.get_parameters('regular')[1]) p_X = stats.GaussianScaleDiag([T.ones(mu_shape), T.zeros(mu_shape)]) return T.mean(T.sum(stats.kl_divergence(q_X, p_X), -1), 0), {}
if __name__ == "__main__": args = parse_args() model_path = Path('results') / args.name / 'models' / 'model_latest.pkl' dataset1 = load(args.round1, args.location1, args.board1) dataset2 = load(args.round2, args.location2, args.board2) train = dataset1[0].join(dataset2[0], lsuffix='-left').dropna() test = dataset1[1].join(dataset2[1], lsuffix='-left').dropna() model = joblib.load(model_path) fixer_model = pickle.loads(model.architecture)[0][args.board2] X = T.placeholder(T.floatx(), [None, 3]) Y = T.placeholder(T.floatx(), [None, 3]) Y_ = fixer_model(X) loss = T.mean((Y - Y_)**2) train_op = T.core.train.AdamOptimizer(1e-4).minimize( loss, var_list=fixer_model.get_parameters()) X_data_train = train[[s + '-left' for s in sensor_features]].as_matrix() Y_data_train = model.representation(train[sensor_features], train['board']) X_data_test = test[[s + '-left' for s in sensor_features]].as_matrix() Y_data_test = model.representation(test[sensor_features], test['board']) sess = T.interactive_session() fit_nn(X_data_train, Y_data_train, fixer_model, batch_size=64) train_preds = model.calibrate(sess.run(Y_, {X: X_data_train}), train[env_features]) train_mae = abs(train_preds - train[Y_features]) test_preds = model.calibrate(sess.run(Y_, {X: X_data_test}), test[env_features])
def initialize(self): self.graph = T.core.Graph() with self.graph.as_default(): prior_params = self.prior_params.copy() prior_type = prior_params.pop('prior_type') self.prior = PRIOR_MAP[prior_type](self.ds, self.da, self.horizon, **prior_params) cost_params = self.cost_params.copy() cost_type = cost_params.pop('cost_type') self.cost = COST_MAP[cost_type](self.ds, self.da, **cost_params) self.O = T.placeholder(T.floatx(), [None, None, self.do]) self.U = T.placeholder(T.floatx(), [None, None, self.du]) self.C = T.placeholder(T.floatx(), [None, None]) self.S = T.placeholder(T.floatx(), [None, None, self.ds]) self.A = T.placeholder(T.floatx(), [None, None, self.da]) self.t = T.placeholder(T.int32, []) self.state, self.action = T.placeholder(T.floatx(), [None, self.ds]), T.placeholder(T.floatx(), [None, self.da]) if self.prior.has_dynamics(): self.next_state = self.prior.next_state(self.state, self.action, self.t) self.prior_dynamics = self.prior.get_dynamics() self.num_data = T.scalar() self.beta = T.placeholder(T.floatx(), []) self.learning_rate = T.placeholder(T.floatx(), []) self.model_learning_rate = T.placeholder(T.floatx(), []) self.S_potentials = util.map_network(self.state_encoder)(self.O) self.A_potentials = util.map_network(self.action_encoder)(self.U) if self.prior.is_dynamics_prior(): self.data_strength = T.placeholder(T.floatx(), []) self.max_iter = T.placeholder(T.int32, []) posterior_dynamics, (encodings, actions) = \ self.prior.posterior_dynamics(self.S_potentials, self.A_potentials, data_strength=self.data_strength, max_iter=self.max_iter) self.posterior_dynamics_ = posterior_dynamics, (encodings.expected_value(), actions.expected_value()) if self.prior.is_filtering_prior(): self.prior_dynamics_stats = self.prior.sufficient_statistics() self.dynamics_stats = ( T.placeholder(T.floatx(), [None, self.ds, self.ds]), T.placeholder(T.floatx(), [None, self.ds, self.ds + self.da]), T.placeholder(T.floatx(), [None, self.ds + self.da, self.ds + self.da]), T.placeholder(T.floatx(), [None]), ) S_natparam = self.S_potentials.get_parameters('natural') num_steps = T.shape(S_natparam)[1] self.padded_S = stats.Gaussian(T.core.pad( self.S_potentials.get_parameters('natural'), [[0, 0], [0, self.horizon - num_steps], [0, 0], [0, 0]] ), 'natural') self.padded_A = stats.GaussianScaleDiag([ T.core.pad(self.A_potentials.get_parameters('regular')[0], [[0, 0], [0, self.horizon - num_steps], [0, 0]]), T.core.pad(self.A_potentials.get_parameters('regular')[1], [[0, 0], [0, self.horizon - num_steps], [0, 0]]) ], 'regular') self.q_S_padded, self.q_A_padded = self.prior.encode( self.padded_S, self.padded_A, dynamics_stats=self.dynamics_stats ) self.q_S_filter = self.q_S_padded.filter(max_steps=num_steps) self.q_A_filter = self.q_A_padded.__class__( self.q_A_padded.get_parameters('natural')[:, :num_steps] , 'natural') self.e_q_S_filter = self.q_S_filter.expected_value() self.e_q_A_filter = self.q_A_filter.expected_value() (self.q_S, self.q_A), self.prior_kl, self.kl_grads, self.info = self.prior.posterior_kl_grads( self.S_potentials, self.A_potentials, self.num_data ) self.q_S_sample = self.q_S.sample()[0] self.q_A_sample = self.q_A.sample()[0] self.q_O = util.map_network(self.state_decoder)(self.q_S_sample) self.q_U = util.map_network(self.action_decoder)(self.q_A_sample) self.q_O_sample = self.q_O.sample()[0] self.q_U_sample = self.q_U.sample()[0] self.q_O_ = util.map_network(self.state_decoder)(self.S) self.q_U_ = util.map_network(self.action_decoder)(self.A) self.q_O__sample = self.q_O_.sample()[0] self.q_U__sample = self.q_U_.sample()[0] self.cost_likelihood = self.cost.log_likelihood(self.q_S_sample, self.C) if self.cost.is_cost_function(): self.evaluated_cost = self.cost.evaluate(self.S) self.log_likelihood = T.sum(self.q_O.log_likelihood(self.O), axis=1) self.elbo = T.mean(self.log_likelihood + self.cost_likelihood - self.prior_kl) train_elbo = T.mean(self.log_likelihood + self.beta * (self.cost_likelihood - self.prior_kl)) T.core.summary.scalar("encoder-stdev", T.mean(self.S_potentials.get_parameters('regular')[0])) T.core.summary.scalar("log-likelihood", T.mean(self.log_likelihood)) T.core.summary.scalar("cost-likelihood", T.mean(self.cost_likelihood)) T.core.summary.scalar("prior-kl", T.mean(self.prior_kl)) T.core.summary.scalar("beta", self.beta) T.core.summary.scalar("elbo", self.elbo) T.core.summary.scalar("beta-elbo", train_elbo) for k, v in self.info.items(): T.core.summary.scalar(k, T.mean(v)) self.summary = T.core.summary.merge_all() neural_params = ( self.state_encoder.get_parameters() + self.state_decoder.get_parameters() + self.action_encoder.get_parameters() + self.action_decoder.get_parameters() ) cost_params = self.cost.get_parameters() if len(neural_params) > 0: optimizer = T.core.train.AdamOptimizer(self.learning_rate) gradients, variables = zip(*optimizer.compute_gradients(-train_elbo, var_list=neural_params)) gradients, _ = tf.clip_by_global_norm(gradients, 5.0) self.neural_op = optimizer.apply_gradients(zip(gradients, variables)) else: self.neural_op = T.core.no_op() if len(cost_params) > 0: self.cost_op = T.core.train.AdamOptimizer(self.learning_rate).minimize(-self.elbo, var_list=cost_params) else: self.cost_op = T.core.no_op() if len(self.kl_grads) > 0: if self.prior.is_dynamics_prior(): # opt = lambda x: T.core.train.MomentumOptimizer(x, 0.5) opt = lambda x: T.core.train.GradientDescentOptimizer(x) else: opt = T.core.train.AdamOptimizer self.dynamics_op = opt(self.model_learning_rate).apply_gradients([ (b, a) for a, b in self.kl_grads ]) else: self.dynamics_op = T.core.no_op() self.train_op = T.core.group(self.neural_op, self.dynamics_op, self.cost_op) self.session = T.interactive_session(graph=self.graph, allow_soft_placement=True, log_device_placement=False)
q_w.get_parameters('natural')) / N next_w = Gaussian(q_w.get_parameters('natural') + lr * natural_gradient, parameter_type='natural') l_w = kl_divergence(q_w, p_w)[0] p_y = Bernoulli(T.sigmoid(T.einsum('jw,iw->ij', next_w.expected_value(), x))) l_y = T.sum(p_y.log_likelihood(y[..., None])) elbo = l_w + l_y nat_op = T.assign(q_w.get_parameters('natural'), next_w.get_parameters('natural')) grad_op = tf.train.RMSPropOptimizer(1e-4).minimize(-elbo) train_op = tf.group(nat_op, grad_op) sess = T.interactive_session() predictions = T.cast( T.sigmoid(T.einsum('jw,iw->i', q_w.expected_value(), T.to_float(X))) + 0.5, np.int32) accuracy = T.mean( T.to_float(T.equal(predictions, T.constant(Y.astype(np.int32))))) def iter(num_iter=1, b=100): for _ in range(num_iter): idx = np.random.permutation(N)[:b] sess.run(train_op, {x: X[idx], y: Y[idx]}) print("%f" % (tuple(sess.run([elbo], {x: X, y: Y})))) print(sess.run(q_w.get_parameters('regular')[1][0]), coef_[0]) print(sess.run(accuracy), score_)
def log_likelihood(self, batch, batch_z): z = Vector(self.input_size, placeholder=batch_z, is_input=False) p = (z >> self.p_network).get_graph_outputs()[0] return T.mean(batch * p + (1 - batch) * T.log(1 - p + 1e-10))
def log_likelihood(self, batch_z, batch): x = Vector(self.input_size, placeholder=batch, is_input=False) mu, sigma = (x >> self.q_network).get_graph_outputs() sigma = T.sqrt(T.exp(sigma)) return T.mean( log_normal(batch_z, mu, sigma, self.embedding_size, dim=2))