def __init__(self, env_fns, spaces=None, context='spawn'): """ If you don't specify observation_space, we'll have to create a dummy environment to get it. """ ctx = mp.get_context(context) if spaces: observation_space, action_space = spaces else: logger.log('Creating dummy env object to get spaces') with logger.scoped_configure(format_strs=[]): dummy = env_fns[0]() observation_space, action_space = dummy.observation_space, dummy.action_space dummy.close() del dummy VecEnv.__init__(self, len(env_fns), observation_space, action_space) self.obs_keys, self.obs_shapes, self.obs_dtypes = obs_space_info( observation_space) self.obs_bufs = [{ k: ctx.Array(_NP_TO_CT[self.obs_dtypes[k].type], int(np.prod(self.obs_shapes[k]))) for k in self.obs_keys } for _ in env_fns] self.parent_pipes = [] self.procs = [] with clear_mpi_env_vars(): for env_fn, obs_buf in zip(env_fns, self.obs_bufs): wrapped_fn = CloudpickleWrapper(env_fn) parent_pipe, child_pipe = ctx.Pipe() proc = ctx.Process(target=_subproc_worker, args=(child_pipe, parent_pipe, wrapped_fn, obs_buf, self.obs_shapes, self.obs_dtypes, self.obs_keys)) proc.daemon = True self.procs.append(proc) self.parent_pipes.append(parent_pipe) proc.start() child_pipe.close() self.waiting_step = False self.viewer = None
def split_data(paths, n): episode_size = paths.get_current_episode_size() logger.log("Collected episode size is ", episode_size) index = np.arange(episode_size) np.random.shuffle(index) train_index = index[:int(0.8 * episode_size)] test_index = index[int(0.8 * episode_size):] train_dict, test_dict = dict([]), dict([]) for key in paths.buffers: data = paths.buffers[key] train_data, test_data = data[train_index], data[test_index] train_dict[key] = train_data test_dict[key] = test_data train_lst, test_lst = [], [] for i in range(n): train, test = dict([]), dict([]) for key in paths.buffers: train_data, test_data = train_dict[key], test_dict[key] train[key] = train_data[int(i / n) * 100:(int(i / n) + 1) * 100] test[key] = test_data[int(i / n) * 100:(int(i / n) + 1) * 100] train_lst.append(train) test_lst.append(test) return train_lst, test_lst
def fit(self, obs, act, obs_next, epochs=1000, compute_normalization=True, verbose=False, valid_split_ratio=None, rolling_average_persitency=None, log_tabular=False, early_stopping=True): """ Fits the NN dynamics model :param obs: observations - numpy array of shape (n_samples, ndim_obs) :param act: actions - numpy array of shape (n_samples, ndim_act) :param obs_next: observations after taking action - numpy array of shape (n_samples, ndim_obs) :param epochs: number of training epochs :param compute_normalization: boolean indicating whether normalization shall be (re-)computed given the data :param valid_split_ratio: relative size of validation split (float between 0.0 and 1.0) :param verbose: logging verbosity """ assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims assert obs_next.ndim == 2 and obs_next.shape[1] == self.obs_space_dims assert act.ndim == 2 and act.shape[1] == self.action_space_dims if valid_split_ratio is None: valid_split_ratio = self.valid_split_ratio if rolling_average_persitency is None: rolling_average_persitency = self.rolling_average_persitency assert 1 > valid_split_ratio >= 0 sess = tf.get_default_session() # split into valid and test set delta = obs_next - obs obs_train, act_train, delta_train, obs_test, act_test, delta_test = train_test_split( obs, act, delta, test_split_ratio=valid_split_ratio) if self._dataset_test is None: self._dataset_test = dict(obs=obs_test, act=act_test, delta=delta_test) self._dataset_train = dict(obs=obs_train, act=act_train, delta=delta_train) else: n_test_new_samples = len(obs_test) n_max_test = self.buffer_size - n_test_new_samples n_train_new_samples = len(obs_train) n_max_train = self.buffer_size - n_train_new_samples self._dataset_test['obs'] = np.concatenate( [self._dataset_test['obs'][-n_max_test:], obs_test]) self._dataset_test['act'] = np.concatenate( [self._dataset_test['act'][-n_max_test:], act_test]) self._dataset_test['delta'] = np.concatenate( [self._dataset_test['delta'][-n_max_test:], delta_test]) self._dataset_train['obs'] = np.concatenate( [self._dataset_train['obs'][-n_max_train:], obs_train]) self._dataset_train['act'] = np.concatenate( [self._dataset_train['act'][-n_max_train:], act_train]) self._dataset_train['delta'] = np.concatenate( [self._dataset_train['delta'][-n_max_train:], delta_train]) # create data queue if self.next_batch is None: self.next_batch, self.iterator = self._data_input_fn( self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta'], batch_size=self.batch_size, buffer_size=self.buffer_size) valid_loss_rolling_average = None if (self.normalization is None or compute_normalization) and self.normalize_input: self.compute_normalization(self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta']) if self.normalize_input: # normalize data obs_train, act_train, delta_train = self._normalize_data( self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta']) assert obs_train.ndim == act_train.ndim == delta_train.ndim == 2 else: obs_train = self._dataset_train['obs'] act_train = self._dataset_train['act'] delta_train = self._dataset_train['delta'] # Training loop for epoch in range(epochs): # initialize data queue sess.run(self.iterator.initializer, feed_dict={ self.obs_dataset_ph: obs_train, self.act_dataset_ph: act_train, self.delta_dataset_ph: delta_train }) batch_losses = [] while True: try: obs_batch, act_batch, delta_batch = sess.run( self.next_batch) # run train op batch_loss, _ = sess.run( [self.loss, self.train_op], feed_dict={ self.obs_ph: obs_batch, self.act_ph: act_batch, self.delta_ph: delta_batch }) batch_losses.append(batch_loss) except tf.errors.OutOfRangeError: # compute validation loss if self.normalize_input: # normalize data obs_test, act_test, delta_test = self._normalize_data( self._dataset_test['obs'], self._dataset_test['act'], self._dataset_test['delta']) assert obs_test.ndim == act_test.ndim == delta_test.ndim == 2 else: obs_test = self._dataset_test['obs'] act_test = self._dataset_test['act'] delta_test = self._dataset_test['delta'] valid_loss = sess.run(self.loss, feed_dict={ self.obs_ph: obs_test, self.act_ph: act_test, self.delta_ph: delta_test }) if valid_loss_rolling_average is None: valid_loss_rolling_average = 1.5 * valid_loss # set initial rolling to a higher value avoid too early stopping valid_loss_rolling_average_prev = 2.0 * valid_loss valid_loss_rolling_average = rolling_average_persitency * valid_loss_rolling_average + ( 1.0 - rolling_average_persitency) * valid_loss if verbose: logger.log( "Training NNDynamicsModel - finished epoch %i -- train loss: %.4f valid loss: %.4f valid_loss_mov_avg: %.4f" % (epoch, float(np.mean(batch_losses)), valid_loss, valid_loss_rolling_average)) break if early_stopping and valid_loss_rolling_average_prev < valid_loss_rolling_average: logger.log( 'Stopping DynamicsEnsemble Training since valid_loss_rolling_average decreased' ) break valid_loss_rolling_average_prev = valid_loss_rolling_average
def main(**kwargs): z_dim = kwargs['z_dim'] trans_mode = kwargs['trans_mode'] epochs = kwargs['epochs'] include_action = kwargs['include_action'] label = kwargs['label'] dataset = kwargs['data_path'] feature_dims = kwargs['feature_dims'] mode = kwargs['mode'] n = kwargs['n'] k = kwargs['k'] encoder_lr = kwargs['encoder_lr'] decoder_lr = kwargs['decoder_lr'] decoder_feature_dims = kwargs['decoder_feature_dims'] process_type = kwargs['process_type'] if kwargs['data_path'] == '../dataset/sequence/HandManipulateEgg-v0/5seeds-dict.pickle': kwargs['dataset'] = 'trained_5seeds' elif kwargs['data_path'] == '../dataset/untrained/HandManipulateEgg-v0/5seeds-dict.pickle': kwargs['dataset'] = 'untrained_5seeds' elif kwargs['data_path'] == '../dataset/HandManipulateEgg-v09-dict.pickle': kwargs['dataset'] = 'trained_1seed' exp_dir = os.getcwd() + '/data/' + EXP_NAME + '/' + str(kwargs['seed']) if kwargs['debug']: save_dir = '../saved_cpc/' + str(label) + '/' + str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained/debug' # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained/debug' else: save_dir = '../saved_cpc/' + str(label) + '/' + str(kwargs['normalize_data']) + '/' + str(process_type)+ '/trained' # save_dir = '../saved_cpc/' + str(label) + '/' + str(process_type)+ '/trained' logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'], snapshot_mode='last') json.dump(kwargs, open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True, cls=ClassEncoder) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = kwargs.get('gpu_frac', 0.95) sess = tf.Session(config=config) obs, acts, fixed_num_of_contact = pickle.load(open(dataset, 'rb')) env = gym.make(kwargs['env'], obs_type = kwargs['obs_type'], fixed_num_of_contact = [fixed_num_of_contact, True]) ngeoms = env.sim.model.ngeom obs, object_info = expand_data(obs, ngeoms, fixed_num_of_contact) if kwargs['normalize_data']: obs = normalize_obs(obs) next_obs = obs[:, 1:] obs = obs[:, :-1] N, L, _, contact_point_dim = obs.shape N, L, action_dim = acts.shape obs_dim = (fixed_num_of_contact, contact_point_dim) train_data, test_data = split_data([obs, acts, next_obs, object_info]) batch_size = 2 if mode in ['restore', 'store_weights']: saver = tf.train.import_meta_graph(save_dir + '-999.meta') pur_save_dir = save_dir[:-8] saver.restore(sess, tf.train.latest_checkpoint(pur_save_dir)) graph = tf.get_default_graph() with sess.as_default() as sess: encoder = Encoder(z_dim, fixed_num_of_contact, contact_point_dim, feature_dims) trans = Transition(z_dim, action_dim, mode = trans_mode) cpc = CPC(sess, encoder, trans, encoder_lr, fixed_num_of_contact, contact_point_dim, action_dim, include_action = include_action, type = 1*(label=='cpc1') + 2*(label=='cpc2'), n_neg = n, process_type = process_type, mode = mode) cpc_epochs, decoder_epochs = epochs if mode == 'train': sess.run(tf.global_variables_initializer()) logger.log("training started") for epoch in range(cpc_epochs): # train_cpc(cpc, epoch, train_data, batch_size, n, k) test_cpc(cpc, epoch, test_data, batch_size, n, k) logger.logkv("epoch", epoch) logger.dumpkvs() cpc.save_model(save_dir, 999) """decoder""" logger.log("Done with cpc training.") decoder = Decoder(cpc, sess, z_dim, decoder_feature_dims, fixed_num_of_contact, contact_point_dim, decoder_lr) uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))] sess.run(tf.variables_initializer(uninit_vars)) for epoch in range(decoder_epochs): train_decoder(decoder, epoch, train_data, batch_size, n, k) test_decoder(decoder, epoch, test_data, batch_size, n, k) logger.logkv("epoch", (epoch + cpc_epochs)) logger.dumpkvs() print("model saved in", save_dir) elif mode == 'restore': decoder = Decoder(cpc, sess, z_dim, decoder_feature_dims, fixed_num_of_contact, contact_point_dim, decoder_lr) uninit_vars = [var for var in tf.global_variables() if not sess.run(tf.is_variable_initialized(var))] sess.run(tf.variables_initializer(uninit_vars)) print("initialized") for epoch in range(100): train_decoder(decoder, epoch, train_data, batch_size, n, k) test_decoder(decoder, epoch, test_data, batch_size, n, k) logger.logkv("epoch", epoch) logger.dumpkvs() print("logging to", exp_dir) elif mode == 'store_weights': old = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='') old = sess.run(old) save_dir = './saved_model/' + str(label) + '/' + str(process_type)+ '/trained/' with open(save_dir + 'weights.pickle', 'wb') as pickle_file: pickle.dump(old, pickle_file) print("weights saved to", save_dir) save_dir = '/home/vioichigo/try/tactile-baselines/saved_model/cpc2/trained' with open(save_dir + 'params.pickle', 'wb') as pickle_file: pickle.dump([z_dim, fixed_num_of_contact, contact_point_dim, action_dim, encoder_lr, feature_dims, trans_mode, label, include_action], pickle_file) tf.reset_default_graph() print("graph reset successfully")
def save_model(self, model_dir, i=999): saver = tf.train.Saver() saver.save(self.sess, model_dir, global_step=i) logger.log("saved successfully")
def train(self): """ Trains policy on env using algo Pseudocode: for itr in n_itr: for step in num_inner_grad_steps: sampler.sample() algo.compute_updated_dists() algo.optimize_policy() sampler.update_goals() """ with self.sess.as_default() as sess: # initialize uninitialized vars (only initialize vars that were not loaded) sess.run(tf.global_variables_initializer()) start_time = time.time() if self.start_itr == 0: self.algo._update_target(tau=1.0) if self.n_initial_exploration_steps > 0: while self.replay_buffer._size < self.n_initial_exploration_steps: paths = self.sampler.obtain_samples( log=True, log_prefix='train-', random=True) samples_data = self.sample_processor.process_samples( paths, log='all', log_prefix='train-')[0] self.replay_buffer.add_samples( samples_data['observations'], samples_data['actions'], samples_data['rewards'], samples_data['dones'], samples_data['next_observations'], ) for itr in range(self.start_itr, self.n_itr): itr_start_time = time.time() logger.log( "\n ---------------- Iteration %d ----------------" % itr) logger.log( "Sampling set of tasks/goals for this meta-batch...") """ -------------------- Sampling --------------------------""" logger.log("Obtaining samples...") time_env_sampling_start = time.time() paths = self.sampler.obtain_samples(log=True, log_prefix='train-') sampling_time = time.time() - time_env_sampling_start """ ----------------- Processing Samples ---------------------""" # check how the samples are processed logger.log("Processing samples...") time_proc_samples_start = time.time() samples_data = self.sample_processor.process_samples( paths, log='all', log_prefix='train-')[0] self.replay_buffer.add_samples( samples_data['observations'], samples_data['actions'], samples_data['rewards'], samples_data['dones'], samples_data['next_observations'], ) proc_samples_time = time.time() - time_proc_samples_start paths = self.sampler.obtain_samples(log=True, log_prefix='eval-', deterministic=True) _ = self.sample_processor.process_samples( paths, log='all', log_prefix='eval-')[0] # self.log_diagnostics(paths, prefix='train-') """ ------------------ Policy Update ---------------------""" logger.log("Optimizing policy...") # This needs to take all samples_data so that it can construct graph for meta-optimization. time_optimization_step_start = time.time() self.algo.optimize_policy(self.replay_buffer, itr * self.epoch_length, self.num_grad_steps) """ ------------------- Logging Stuff --------------------------""" logger.logkv('Itr', itr) logger.logkv('n_timesteps', self.sampler.total_timesteps_sampled) logger.logkv('Time-Optimization', time.time() - time_optimization_step_start) logger.logkv('Time-SampleProc', np.sum(proc_samples_time)) logger.logkv('Time-Sampling', sampling_time) logger.logkv('Time', time.time() - start_time) logger.logkv('ItrTime', time.time() - itr_start_time) logger.dumpkvs() if itr == 0: sess.graph.finalize() logger.log("Training finished") self.sess.close()
def fit(self, obs, act, obs_next, epochs=1000, compute_normalization=True, valid_split_ratio=None, rolling_average_persitency=None, verbose=False, log_tabular=False, prefix=''): """ Fits the NN dynamics model :param obs: observations - numpy array of shape (n_samples, ndim_obs) :param act: actions - numpy array of shape (n_samples, ndim_act) :param obs_next: observations after taking action - numpy array of shape (n_samples, ndim_obs) :param epochs: number of training epochs :param compute_normalization: boolean indicating whether normalization shall be (re-)computed given the data :param valid_split_ratio: relative size of validation split (float between 0.0 and 1.0) :param (boolean) whether to log training stats in tabular format :param verbose: logging verbosity """ if obs is not None: self.update_buffer(obs, act, obs_next, valid_split_ratio, compute_normalization) if rolling_average_persitency is None: rolling_average_persitency = self.rolling_average_persitency sess = tf.get_default_session() if compute_normalization and self.normalize_input: self.compute_normalization(self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta']) if self.normalize_input: # normalize data obs_train, act_train, delta_train = self._normalize_data( self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta']) else: obs_train, act_train, delta_train = self._dataset_train['obs'], self._dataset_train['act'],\ self._dataset_train['delta'] valid_loss_rolling_average = None train_op_to_do = self.train_op_model_batches idx_to_remove = [] epoch_times = [] epochs_per_model = [] """ ------- Looping over training epochs ------- """ for epoch in range(epochs): # initialize data queue feed_dict = dict( list(zip(self.obs_batches_dataset_ph, obs_train)) + list(zip(self.act_batches_dataset_ph, act_train)) + list(zip(self.delta_batches_dataset_ph, delta_train))) sess.run(self.iterator.initializer, feed_dict=feed_dict) # preparations for recording training stats epoch_start_time = time.time() batch_losses = [] """ ------- Looping through the shuffled and batched dataset for one epoch -------""" while True: try: obs_act_delta = sess.run(self.next_batch) obs_batch_stack = np.concatenate( obs_act_delta[:self.num_models], axis=0) act_batch_stack = np.concatenate( obs_act_delta[self.num_models:2 * self.num_models], axis=0) delta_batch_stack = np.concatenate( obs_act_delta[2 * self.num_models:], axis=0) # run train op batch_loss_train_ops = sess.run( self.loss_model_batches + train_op_to_do, feed_dict={ self.obs_model_batches_stack_ph: obs_batch_stack, self.act_model_batches_stack_ph: act_batch_stack, self.delta_model_batches_stack_ph: delta_batch_stack }) batch_loss = np.array( batch_loss_train_ops[:self.num_models]) batch_losses.append(batch_loss) except tf.errors.OutOfRangeError: if self.normalize_input: # normalize data obs_test, act_test, delta_test = self._normalize_data( self._dataset_test['obs'], self._dataset_test['act'], self._dataset_test['delta']) else: obs_test, act_test, delta_test = self._dataset_test['obs'], self._dataset_test['act'], \ self._dataset_test['delta'] obs_test_stack = np.concatenate(obs_test, axis=0) act_test_stack = np.concatenate(act_test, axis=0) delta_test_stack = np.concatenate(delta_test, axis=0) # compute validation loss valid_loss = sess.run( self.loss_model_batches, feed_dict={ self.obs_model_batches_stack_ph: obs_test_stack, self.act_model_batches_stack_ph: act_test_stack, self.delta_model_batches_stack_ph: delta_test_stack }) valid_loss = np.array(valid_loss) if valid_loss_rolling_average is None: valid_loss_rolling_average = 1.5 * valid_loss # set initial rolling to a higher value avoid too early stopping valid_loss_rolling_average_prev = 2.0 * valid_loss for i in range(len(valid_loss)): if valid_loss[i] < 0: valid_loss_rolling_average[i] = valid_loss[ i] / 1.5 # set initial rolling to a higher value avoid too early stopping valid_loss_rolling_average_prev[ i] = valid_loss[i] / 2.0 valid_loss_rolling_average = rolling_average_persitency*valid_loss_rolling_average \ + (1.0-rolling_average_persitency)*valid_loss if verbose: str_mean_batch_losses = ' '.join([ '%.4f' % x for x in np.mean(batch_losses, axis=0) ]) str_valid_loss = ' '.join( ['%.4f' % x for x in valid_loss]) str_valid_loss_rolling_averge = ' '.join( ['%.4f' % x for x in valid_loss_rolling_average]) logger.log( "Training NNDynamicsModel - finished epoch %i --\n" "train loss: %s\nvalid loss: %s\nvalid_loss_mov_avg: %s" % (epoch, str_mean_batch_losses, str_valid_loss, str_valid_loss_rolling_averge)) break for i in range(self.num_models): if (valid_loss_rolling_average_prev[i] < valid_loss_rolling_average[i] or epoch == epochs - 1) and i not in idx_to_remove: idx_to_remove.append(i) epochs_per_model.append(epoch) if epoch < epochs - 1: logger.log( 'At Epoch {}, stop model {} since its valid_loss_rolling_average decreased' .format(epoch, i)) train_op_to_do = [ op for idx, op in enumerate(self.train_op_model_batches) if idx not in idx_to_remove ] if not idx_to_remove: epoch_times.append( time.time() - epoch_start_time ) # only track epoch times while all models are trained if not train_op_to_do: if verbose and epoch < epochs - 1: logger.log( 'Stopping all DynamicsEnsemble Training before reaching max_num_epochs' ) break valid_loss_rolling_average_prev = valid_loss_rolling_average """ ------- Tabular Logging ------- """ if log_tabular: logger.logkv(prefix + 'AvgModelEpochTime', np.mean(epoch_times)) assert len(epochs_per_model) == self.num_models logger.logkv(prefix + 'AvgEpochs', np.mean(epochs_per_model)) logger.logkv(prefix + 'StdEpochs', np.std(epochs_per_model)) logger.logkv(prefix + 'MaxEpochs', np.max(epochs_per_model)) logger.logkv(prefix + 'MinEpochs', np.min(epochs_per_model)) logger.logkv(prefix + 'AvgFinalTrainLoss', np.mean(batch_losses)) logger.logkv(prefix + 'AvgFinalValidLoss', np.mean(valid_loss)) logger.logkv(prefix + 'AvgFinalValidLossRoll', np.mean(valid_loss_rolling_average))
def fit_one_epoch(self, remaining_model_idx, valid_loss_rolling_average_prev, with_new_data, compute_normalization=True, rolling_average_persitency=None, verbose=False, log_tabular=False, prefix=''): if rolling_average_persitency is None: rolling_average_persitency = self.rolling_average_persitency sess = tf.get_default_session() if with_new_data: if compute_normalization and self.normalize_input: self.compute_normalization(self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta']) self.used_timesteps_counter += len(self._dataset_train['obs'][0]) if self.normalize_input: # normalize data obs_train, act_train, delta_train = self._normalize_data( self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta']) else: obs_train, act_train, delta_train = self._dataset_train['obs'], self._dataset_train['act'], \ self._dataset_train['delta'] valid_loss_rolling_average = valid_loss_rolling_average_prev assert remaining_model_idx is not None train_op_to_do = [ op for idx, op in enumerate(self.train_op_model_batches) if idx in remaining_model_idx ] # initialize data queue feed_dict = dict( list(zip(self.obs_batches_dataset_ph, obs_train)) + list(zip(self.act_batches_dataset_ph, act_train)) + list(zip(self.delta_batches_dataset_ph, delta_train))) sess.run(self.iterator.initializer, feed_dict=feed_dict) # preparations for recording training stats batch_losses = [] """ ------- Looping through the shuffled and batched dataset for one epoch -------""" while True: try: obs_act_delta = sess.run(self.next_batch) obs_batch_stack = np.concatenate( obs_act_delta[:self.num_models], axis=0) act_batch_stack = np.concatenate( obs_act_delta[self.num_models:2 * self.num_models], axis=0) delta_batch_stack = np.concatenate( obs_act_delta[2 * self.num_models:], axis=0) # run train op batch_loss_train_ops = sess.run( self.loss_model_batches + train_op_to_do, feed_dict={ self.obs_model_batches_stack_ph: obs_batch_stack, self.act_model_batches_stack_ph: act_batch_stack, self.delta_model_batches_stack_ph: delta_batch_stack }) batch_loss = np.array(batch_loss_train_ops[:self.num_models]) batch_losses.append(batch_loss) except tf.errors.OutOfRangeError: if self.normalize_input: # TODO: if not with_new_data, don't recompute # normalize data obs_test, act_test, delta_test = self._normalize_data( self._dataset_test['obs'], self._dataset_test['act'], self._dataset_test['delta']) else: obs_test, act_test, delta_test = self._dataset_test['obs'], self._dataset_test['act'], \ self._dataset_test['delta'] obs_test_stack = np.concatenate(obs_test, axis=0) act_test_stack = np.concatenate(act_test, axis=0) delta_test_stack = np.concatenate(delta_test, axis=0) # compute validation loss valid_loss = sess.run(self.loss_model_batches, feed_dict={ self.obs_model_batches_stack_ph: obs_test_stack, self.act_model_batches_stack_ph: act_test_stack, self.delta_model_batches_stack_ph: delta_test_stack }) valid_loss = np.array(valid_loss) if valid_loss_rolling_average is None: valid_loss_rolling_average = 1.5 * valid_loss # set initial rolling to a higher value avoid too early stopping valid_loss_rolling_average_prev = 2.0 * valid_loss for i in range(len(valid_loss)): if valid_loss[i] < 0: valid_loss_rolling_average[i] = valid_loss[ i] / 1.5 # set initial rolling to a higher value avoid too early stopping valid_loss_rolling_average_prev[ i] = valid_loss[i] / 2.0 valid_loss_rolling_average = rolling_average_persitency*valid_loss_rolling_average \ + (1.0-rolling_average_persitency)*valid_loss if verbose: str_mean_batch_losses = ' '.join( ['%.4f' % x for x in np.mean(batch_losses, axis=0)]) str_valid_loss = ' '.join(['%.4f' % x for x in valid_loss]) str_valid_loss_rolling_averge = ' '.join( ['%.4f' % x for x in valid_loss_rolling_average]) logger.log( "Training NNDynamicsModel - finished one epoch\n" "train loss: %s\nvalid loss: %s\nvalid_loss_mov_avg: %s" % (str_mean_batch_losses, str_valid_loss, str_valid_loss_rolling_averge)) break for i in remaining_model_idx: if valid_loss_rolling_average_prev[i] < valid_loss_rolling_average[ i]: remaining_model_idx.remove(i) logger.log( 'Stop model {} since its valid_loss_rolling_average decreased' .format(i)) """ ------- Tabular Logging ------- """ if log_tabular: logger.logkv(prefix + 'TimeStepsCtr', self.timesteps_counter) logger.logkv(prefix + 'UsedTimeStepsCtr', self.used_timesteps_counter) logger.logkv(prefix + 'AvgSampleUsage', self.used_timesteps_counter / self.timesteps_counter) logger.logkv(prefix + 'NumModelRemaining', len(remaining_model_idx)) logger.logkv(prefix + 'AvgTrainLoss', np.mean(batch_losses)) logger.logkv(prefix + 'AvgValidLoss', np.mean(valid_loss)) logger.logkv(prefix + 'AvgValidLossRoll', np.mean(valid_loss_rolling_average)) return remaining_model_idx, valid_loss_rolling_average
def update_buffer(self, obs, act, obs_next, valid_split_ratio=None, check_init=True): assert obs.ndim == 2 and obs.shape[1] == self.obs_space_dims assert obs_next.ndim == 2 and obs_next.shape[1] == self.obs_space_dims assert act.ndim == 2 and act.shape[1] == self.action_space_dims self.timesteps_counter += obs.shape[0] if valid_split_ratio is None: valid_split_ratio = self.valid_split_ratio assert 1 > valid_split_ratio >= 0 # split into valid and test set obs_train_batches = [] act_train_batches = [] delta_train_batches = [] obs_test_batches = [] act_test_batches = [] delta_test_batches = [] delta = obs_next - obs for i in range(self.num_models): obs_train, act_train, delta_train, obs_test, act_test, delta_test = train_test_split( obs, act, delta, test_split_ratio=valid_split_ratio) obs_train_batches.append(obs_train) act_train_batches.append(act_train) delta_train_batches.append(delta_train) obs_test_batches.append(obs_test) act_test_batches.append(act_test) delta_test_batches.append(delta_test) # create data queue # If case should be entered exactly once if check_init and self._dataset_test is None: self._dataset_test = dict(obs=obs_test_batches, act=act_test_batches, delta=delta_test_batches) self._dataset_train = dict(obs=obs_train_batches, act=act_train_batches, delta=delta_train_batches) assert self.next_batch is None self.next_batch, self.iterator = self._data_input_fn( self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta'], batch_size=self.batch_size) assert self.normalization is None if self.normalize_input: self.compute_normalization(self._dataset_train['obs'], self._dataset_train['act'], self._dataset_train['delta']) else: n_test_new_samples = len(obs_test_batches[0]) n_max_test = self.buffer_size_test - n_test_new_samples n_train_new_samples = len(obs_train_batches[0]) n_max_train = self.buffer_size_train - n_train_new_samples for i in range(self.num_models): self._dataset_test['obs'][i] = np.concatenate([ self._dataset_test['obs'][i][-n_max_test:], obs_test_batches[i] ]) self._dataset_test['act'][i] = np.concatenate([ self._dataset_test['act'][i][-n_max_test:], act_test_batches[i] ]) self._dataset_test['delta'][i] = np.concatenate([ self._dataset_test['delta'][i][-n_max_test:], delta_test_batches[i] ]) self._dataset_train['obs'][i] = np.concatenate([ self._dataset_train['obs'][i][-n_max_train:], obs_train_batches[i] ]) self._dataset_train['act'][i] = np.concatenate([ self._dataset_train['act'][i][-n_max_train:], act_train_batches[i] ]) self._dataset_train['delta'][i] = np.concatenate([ self._dataset_train['delta'][i][-n_max_train:], delta_train_batches[i] ]) logger.log( 'Model has dataset_train, dataset_test with size {}, {}'.format( len(self._dataset_train['obs'][0]), len(self._dataset_test['obs'][0])))