class Grad_policy(object): def __init__(self, state_size, action_size, lr=0.001): self.init = xavier_initializer() with tf.variable_scope('supervised_policy'): self.st = tf.placeholder(tf.float32, [None, state_size], name='st') self.acts_prob = self.sl_policy_nn(self.st, state_size, action_size, self.init) self.act = tf.placeholder(tf.int32, [None], name='act') self.reward = tf.placeholder(tf.float32, name='reward') act_mask = tf.cast(tf.one_hot(self.act, depth=action_size), tf.bool) self.act_prob = tf.boolean_mask(self.acts_prob, act_mask) self.loss = sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES, scope='supervised_policy')) + tf.reduce_sum( -tf.log(self.act_prob) * self.reward) self.optimizer = AdamOptimizer(learning_rate=lr) self.training_op = self.optimizer.minimize(self.loss) def sl_policy_nn(self, state, state_size, action_size, init): w1 = tf.get_variable('W1', [state_size, 512], initializer=init, regularizer=l2_regularizer(0.01)) b1 = tf.get_variable('b1', [512], initializer=tf.constant_initializer(0.0)) h1 = tf.nn.relu(tf.matmul(state, w1) + b1) w2 = tf.get_variable('w2', [512, 1024], initializer=init, regularizer=l2_regularizer(0.01)) b2 = tf.get_variable('b2', [1024], initializer=tf.constant_initializer(0.0)) h2 = tf.nn.relu(tf.matmul(h1, w2) + b2) w3 = tf.get_variable('w3', [1024, action_size], initializer=init, regularizer=l2_regularizer(0.01)) b3 = tf.get_variable('b3', [action_size], initializer=tf.constant_initializer(0.0)) acts_prob = tf.nn.softmax(tf.matmul(h2, w3) + b3) return acts_prob def get_act_probs(self, st, sess=None): sess = sess or tf.get_default_session() return sess.run(self.acts_prob, {self.st: st}) def train_batch(self, st, act, reward, sess=None): sess = sess or tf.get_default_session() _, loss = sess.run([self.training_op, self.loss], { self.st: st, self.act: act, self.reward: reward }) return loss
def train(): classifier = get_model() opt = AdamOptimizer(1e-5) images_data = get_classification_data("../data/data_classification_train.json") count = 0 print("Training started") shuffle(images_data) for (i, label) in images_data: img = get_img("../pictures/pictures_classification_train/{}.png".format(i)) def get_loss(): img_vector = tf.convert_to_tensor([img], dtype=np.float32) logits = classifier(img_vector) entropy = sparse_softmax_cross_entropy_with_logits(labels=[label], logits=logits) entropy = tf.gather(entropy, 0) save_data(label, logits[0].numpy().tolist(), entropy.numpy().tolist()) return entropy opt.minimize(get_loss) count += 1 if (count % 1000 == 0): classifier.save_weights(weights_path) print("Weights saved") classifier.save_weights(weights_path) print("Weights saved")
model = LKM(data, additive_kernels, likelihoods) gp_train_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="gp_hyperparameters") ibp_train_vars = list(set(tf.global_variables()) - set(gp_train_vars)) update_tau = model.closed_form_update_tau() elbo = model.build_marginal_loglikelihood() z, nll_gp_refined = model.refine() t_test, K, K_star, K_star_star, noise = model.prepare_for_postprocess() # train IBP parameters with Adam adam = AdamOptimizer(0.01) # train_ibp = adam.minimize(-elbo, var_list=ibp_train_vars) train_ibp = adam.minimize(-elbo, var_list=ibp_train_vars) train_gp = ScipyOptimizerInterface(-elbo, var_list=gp_train_vars, method='L-BFGS-B', options={"maxiter": 10}) # refined train train_gp_refine = ScipyOptimizerInterface(nll_gp_refined, var_list=gp_train_vars, method='L-BFGS-B', options={"maxiter": 300} ) init = tf.global_variables_initializer()