test_v = test_vs[j] test_ic = test_ics[j] np_i = odeint(lambda i_t, time_t: (1 / L) * test_v - (R / L) * i_t, test_ic, np_t) sampled_outputs.append(np_i) # PINN testing np_ic = np.full((len(t), ), test_ic) np_v = np.full((len(t), ), test_v) np_norm_ic = i_normalizer.normalize(np_ic) np_norm_v = v_normalizer.normalize(np_v) # Test with normalized data np_norm_prediction = model.predict(np_norm_t, np_norm_v, np_norm_ic) np_prediction = i_normalizer.denormalize(np_norm_prediction) # Test with denormalized data # np_prediction = model.predict(np_t, np_v, np_ic) predictions.append(np_prediction) title = 'i0 = ' + str(round(test_ic, 3)) + ' A, v = ' + str(test_v) + ' V' titles.append(title) # Results plotter = PlotValidator() plotter.multicompare([np_t], sampled_outputs, predictions, titles) plotter.plot_validation_loss(model)
class Planner(object): @store_args def __init__(self, inp_dim, hid_size, seq_len, out_dim, buffer_size, batch_size=64, optim_stepsize=1e-3, sample_func=None, norm_eps=1e-2, norm_clip=5, scope='planner', layerNorm=False, **kwargs): ''' Implemention of LSTM Planner that produces given number of subgoals between src and dest. Args: inp_dim : dimension for the LSTM hid_size : cell_state_size seq_len : max_timesteps out_dim : dimension for LSTM output ''' # self.main = lstm(hid_size, layerNorm) self.adamepsilon = 1e-6 self.mode = tf.contrib.learn.ModeKeys.TRAIN # TRAIN for training, INFER for prediction, EVAL for evaluation self.infer_outputs = None with tf.variable_scope(self.scope): self._create_network() buffer_shape = [ seq_len + 2, out_dim ] # plus 2: the [0] is 'src', [1] is 'dest', [2:] are 'labels', if self.sample_func is None: from sampler import make_sample_plans self.sample_func = make_sample_plans() self.buffer = PlanReplayBuffer(buffer_shape, buffer_size, self.sample_func) def _create_network(self): self.sess = U.get_session() self.inp_src = tf.placeholder(shape=[None, 1, self.inp_dim], dtype=tf.float32, name='input_src') self.inp_dest = tf.placeholder(shape=[None, 1, self.out_dim], dtype=tf.float32, name='input_dest') self.labels = tf.placeholder(shape=[None, self.seq_len, self.out_dim], dtype=tf.float32, name='label') self.src_seq_len = tf.placeholder(tf.int32, (None, ), name='source_sequence_length') self.tar_seq_len = tf.placeholder(tf.int32, (None, ), name='target_sequence_length') # running averages # with tf.variable_scope('goal_stats_src'): # self.goal_stats_src = Normalizer(self.inp_dim, self.norm_eps, self.norm_clip, sess=self.sess) with tf.variable_scope('goal_stats_dest'): self.goal_stats_dest = Normalizer(self.out_dim, self.norm_eps, self.norm_clip, sess=self.sess, PLN=True) # normalize inp_src, and goals labels inp_src = self.goal_stats_dest.normalize(self.inp_src) inp_dest = self.goal_stats_dest.normalize(self.inp_dest) goal_labels = self.goal_stats_dest.normalize(self.labels) with tf.variable_scope('goal_gen'): encoder_cell = tf.nn.rnn_cell.LSTMCell(self.hid_size) encoder_outputs, encoder_state = tf.nn.dynamic_rnn( encoder_cell, inp_src, sequence_length=self.src_seq_len, dtype=tf.float32) decoder_cell = tf.nn.rnn_cell.LSTMCell(self.hid_size) project_layer = tf.layers.Dense(self.out_dim) with tf.variable_scope("decode"): train_inp = tf.concat([inp_dest, goal_labels[:, :-1, :]], axis=-2) train_helper = tf.contrib.seq2seq.TrainingHelper( train_inp, sequence_length=self.tar_seq_len) train_decoder = tf.contrib.seq2seq.BasicDecoder( decoder_cell, train_helper, encoder_state, output_layer=project_layer) train_outputs, _, final_seq_len = tf.contrib.seq2seq.dynamic_decode( train_decoder, maximum_iterations=self.seq_len) self.train_outputs = train_outputs.rnn_output with tf.variable_scope("decode", reuse=True): infer_helper = ContinousInferHelper(inp_dest[:, 0, :], self.tar_seq_len) infer_decoder = tf.contrib.seq2seq.BasicDecoder( decoder_cell, infer_helper, encoder_state, output_layer=project_layer) infer_outputs, _, final_seq_len = tf.contrib.seq2seq.dynamic_decode( infer_decoder, maximum_iterations=self.seq_len) self.infer_outputs = self.goal_stats_dest.denormalize( infer_outputs.rnn_output) log_sigma = tf.get_variable(name="logstd", shape=[1, self.out_dim], initializer=U.normc_initializer(0.1)) goals = train_outputs.rnn_output loss = 0.5 * tf.reduce_sum(tf.square((goal_labels - goals)/tf.exp(log_sigma)), axis=-1) \ + 0.5 * np.log(2*np.pi) * tf.to_float(tf.shape(self.labels)[-1]) \ + tf.reduce_sum(log_sigma, axis=-1) self.loss = tf.reduce_mean(loss) self.tr_outputs = self.goal_stats_dest.denormalize( self.train_outputs ) # just for inspect the correctness of training var_list = self._vars('') self.grads = U.flatgrad(self.loss, var_list) self.adam = MpiAdam(var_list, epsilon=self.adamepsilon) tf.variables_initializer(self._global_vars('')).run() self.adam.sync() def train(self, use_buffer=False, justEval=False, **kwargs): self.mode = tf.contrib.learn.ModeKeys.TRAIN if not use_buffer: src = np.reshape(kwargs['src'], [-1, 1, self.inp_dim]) dest = np.reshape(kwargs['dest'], [-1, 1, self.out_dim]) lbl = kwargs['lbl'] else: episode_batch = self.buffer.sample(self.batch_size) src = np.reshape(episode_batch[:, 0, :], [-1, 1, self.inp_dim]) lbl = episode_batch[:, 2:, :] dest = np.reshape(episode_batch[:, 1, :], [-1, 1, self.out_dim]) src_seq_len = [1] * src.shape[0] tar_seq_len = [self.seq_len] * dest.shape[0] # compute grads loss, g, tr_sub_goals, te_sub_goals = self.sess.run( [self.loss, self.grads, self.tr_outputs, self.infer_outputs], feed_dict={ self.inp_src: src, self.inp_dest: dest, self.labels: lbl, self.src_seq_len: src_seq_len, self.tar_seq_len: tar_seq_len }) if not justEval: self.adam.update(g, stepsize=self.optim_stepsize) return loss, tr_sub_goals[-1], te_sub_goals[-1] def plan(self, src, dest): src = np.reshape(src, [-1, 1, self.inp_dim]) dest = np.reshape(dest, [-1, 1, self.out_dim]) src_seq_len = [1] * src.shape[0] tar_seq_len = [self.seq_len] * dest.shape[0] plan_goals = self.sess.run(self.infer_outputs, feed_dict={ self.inp_src: src, self.inp_dest: dest, self.src_seq_len: src_seq_len, self.tar_seq_len: tar_seq_len }) assert plan_goals.shape[0] == src.shape[0] and plan_goals.shape[ 1] == self.seq_len plan_goals = np.flip(plan_goals, axis=-2) plan_goals = np.concatenate([plan_goals, dest], axis=-2) # append the ultimate goal return plan_goals def store_episode(self, episode_batch, update_stats=True): """ episode_batch : [batch_size * (subgoal_num+1) * subgoal_dim] """ isNull = episode_batch.shape[0] < 1 if not isNull: self.buffer.store_episode(episode_batch) # logger.info("buffer store_episode done. updating statistics.") if update_stats: subgoals = episode_batch[:, 1:, :] self.goal_stats_dest.update(subgoals, isNull=isNull) # logger.info("ready to recomput_stats") # print(subgoals) self.goal_stats_dest.recompute_stats(inc=episode_batch.shape[0]) def update_normalizer_stats(self, batch): # self.goal_stats_src.update(batch['src']) self.goal_stats_dest.update(batch['dest']) # self.goal_stats_src.recompute_stats() self.goal_stats_dest.recompute_stats() def _vars(self, scope): res = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self.scope + '/' + scope) assert len(res) > 0 return res def _global_vars(self, scope): res = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.scope + '/' + scope) return res def save(self, save_path): assert self.infer_outputs is not None var_list = self._global_vars('') U.save_variables(save_path, variables=var_list, sess=self.sess) def load(self, load_path): if self.infer_outputs is None: self._create_network() var_list = self._global_vars('') U.load_variables(load_path, variables=var_list) def logs(self, prefix=''): logs = [] logs += [('subgoals/buff_size', self.buffer.get_current_episode_size()) ] logs += [('goals/mean', np.mean(self.sess.run([self.goal_stats_dest.mean])))] logs += [('goals/std', np.mean(self.sess.run([self.goal_stats_dest.std])))] if prefix != '': prefix = prefix.strip('/') return [(prefix + '/' + key, val) for key, val in logs] else: return logs