def _default_hparams(self): # Data Dimensions default_dict = AttrDict({ 'batch_size': -1, 'max_seq_len': -1, 'n_actions': -1, 'state_dim': -1, 'input_nc': 3, # number of input feature maps 'device': None, 'data_conf': None, 'img_sz': None, 'goal_cond': True }) # Network params default_dict.update({ 'use_convs': True, 'use_batchnorm': True, # TODO deprecate 'normalization': 'batch', }) # add new params to parent params parent_params = HParams() for k in default_dict.keys(): parent_params.add_hparam(k, default_dict[k]) return parent_params
def loss(self, model_output): losses = AttrDict() for i_cl, cl in enumerate(self.tdist_classifiers): setattr(losses, 'tdist{}'.format(cl.tdist), cl.loss(model_output[i_cl])) # compute total loss losses.total_loss = torch.stack(list(losses.values())).sum() return losses
def loss(self, model_output): losses = AttrDict() setattr( losses, 'mse', torch.nn.MSELoss()(model_output.action.squeeze(), self.labels.to(self._hp.device))) # compute total loss losses.total_loss = torch.stack(list(losses.values())).sum() return losses
def loss(self, model_output): losses = AttrDict() setattr( losses, 'cross_entropy', torch.nn.CrossEntropyLoss()(model_output.logits, self.labels.to(self._hp.device))) # compute total loss losses.total_loss = torch.stack(list(losses.values())).sum() return losses
def _default_hparams(self): default_dict = AttrDict({ 'num_bins': 10, }) # add new params to parent params parent_params = super()._default_hparams() for k in default_dict.keys(): parent_params.add_hparam(k, default_dict[k]) return parent_params
def _default_hparams(self): default_dict = AttrDict({ 'ndist_max': 10, # maximum temporal distance to classify 'use_skips': False, #todo try resnet architecture! 'ngf': 8, 'nz_enc': 64, 'classifier_restore_path': None # not really needed here. }) # add new params to parent params parent_params = super()._default_hparams() for k in default_dict.keys(): parent_params.add_hparam(k, default_dict[k]) return parent_params
def _default_hparams(self): default_dict = AttrDict({ 'tmax_label': 10, # the highest label for temporal distance, values are clamped after that 'use_skips': False, #todo try resnet architecture! 'ngf': 8, 'nz_enc': 64, 'classifier_restore_path': None # not really needed here. }) # add new params to parent params parent_params = super()._default_hparams() for k in default_dict.keys(): parent_params.add_hparam(k, default_dict[k]) return parent_params
def __getitem__(self, index): file_index = index // self.traj_per_file path = self.filenames[file_index] start_ind_str, _ = path.split('/')[-1][:-3].split('to') start_ind = self._get_num_from_str(start_ind_str) with h5py.File(path, 'r') as F: ex_index = index % self.traj_per_file # get the index key = 'traj{}'.format(ex_index) traj_ind = start_ind + ex_index data_dict = AttrDict(images=(F[key + '/images'].value)) # Fetch data into a dict for name in F[key].keys(): if name in ['states', 'actions', 'pad_mask']: data_dict[name] = F[key + '/' + name].value.astype(np.float32) data_dict = self.process_data_dict(data_dict) if self._data_conf.sel_len != -1: data_dict = self.sample_rand_shifts(data_dict) data_dict['index'] = index return data_dict
def val(self, test_control=True): print('Running Testing') if self.args.test_prediction: start = time.time() self.model_val.load_state_dict(self.model.state_dict()) if self._hp.model_test is not None: self.model_test.load_state_dict(self.model.state_dict()) losses_meter = RecursiveAverageMeter() with autograd.no_grad(): for batch_idx, sample_batched in enumerate(self.val_loader): inputs = AttrDict(map_dict(lambda x: x.to(self.device), sample_batched)) output = self.model_val(inputs) losses = self.model_val.loss(output) if self._hp.model_test is not None: run_through_traj(self.model_test, inputs) losses_meter.update(losses) del losses if self.run_testmetrics: print("Finished Evaluation! Exiting...") exit(0) self.model_val.log_outputs( output, inputs, losses_meter.avg, self.global_step, log_images=True, phase='val') print(('\nTest set: Average loss: {:.4f} in {:.2f}s\n' .format(losses_meter.avg.total_loss.item(), time.time() - start))) del output
def _default_hparams(self): default_dict = AttrDict({ 'use_skips': False, #todo try resnet architecture! 'ngf': 8, 'action_size': 2, 'nz_enc': 64, 'classifier_restore_path': None, # not really needed here., 'low_dim': False, 'gamma': 0.0 }) # add new params to parent params parent_params = super()._default_hparams() for k in default_dict.keys(): parent_params.add_hparam(k, default_dict[k]) return parent_params
def make_prediction(self, image_pairs_stacked): im_t0, im_t1 = image_pairs_stacked[:, 0], image_pairs_stacked[:, 1] embeddings = self.encoder(torch.cat([im_t0, im_t1], dim=1)) embeddings = self.spatial_softmax(embeddings) self.tdist_estimates = self.linear(embeddings) model_output = AttrDict(tdist_estimates=self.tdist_estimates, img_pair=image_pairs_stacked) return model_output
def _default_hparams(self): default_dict = AttrDict({ 'use_skips': False, 'ngf': 8, 'action_size': 2, 'state_size': 30, 'nz_enc': 64, 'linear_layer_size': 128, 'classifier_restore_path': None, # not really needed here., 'low_dim': False, 'gamma': 0.0, 'terminal': True, 'update_target_rate': 1, 'action_range': [-1.0, 1.0], 'action_stds': [0.6, 0.6, 0.3, 0.3], 'est_max_samples': 100, 'binary_reward': [0, 1], 'n_step': 1, 'min_q': False, 'min_q_weight': 1.0, 'min_q_lagrange': False, 'min_q_eps': 0.1, 'sigmoid': False, 'optimize_actions': 'random_shooting', 'target_network_update': 'replace', 'polyak': 0.995, 'sg_sample': 'half_unif_half_first', 'geom_sample_p': 0.5, 'bellman_weight': 1.0, 'td_loss': 'mse', 'add_negative_sample': False, 'negative_sample_type': 'copy_arm', # also rand_arm, batch_goal 'gaussian_blur': False, 'twin_critics': False, 'add_action_noise': False, 'action_scaling': 1.0, 'eval_target_nets': True, }) # add new params to parent params parent_params = super()._default_hparams() for k in default_dict.keys(): parent_params.add_hparam(k, default_dict[k]) return parent_params
def loss(self, model_output): if self._hp.low_dim: image_pairs = self.images[:, 2:] else: image_pairs = self.images[:, 3:] ## Get max_a Q (s_t+1) (Is a min since lower is better) qs = [] for ns in range(100): actions = torch.FloatTensor( model_output.size(0), self._hp.action_size).uniform_(-1, 1).cuda() targetq = self.target_qnetwork(image_pairs, actions) qs.append(targetq) qs = torch.stack(qs) qval = torch.sum( (1 + torch.arange(qs.shape[2])[None]).float().to(self._hp.device) * qs, 2) ## Select corresponding target Q distribution ids = qval.min(0)[1] newqs = [] for k in range(self._hp.batch_size * 2): newqs.append(qs[ids[k], k]) qs = torch.stack(newqs) ## Shift Q*(s_t+1) to get Q*(s_t) shifted = torch.zeros(qs.size()).to(self._hp.device) shifted[:, 1:] = qs[:, :-1] shifted[:, -1] += qs[:, -1] lb = self.labels.to(self._hp.device).unsqueeze(-1) isg = torch.zeros((self._hp.batch_size * 2, 10)).to(self._hp.device) isg[:, 0] = 1 ## If next state is goal then target should be 0, else should be shifted losses = AttrDict() target = (lb * isg) + ((1 - lb) * shifted) ## KL between target and output log_q = self.out_softmax.clamp(1e-5, 1 - 1e-5).log() log_t = target.clamp(1e-5, 1 - 1e-5).log() losses.total_loss = (target * (log_t - log_q)).sum(1).mean() self.target_qnetwork.load_state_dict(self.qnetwork.state_dict()) return losses
def loss(self, model_output): # BCE = F.binary_cross_entropy(self.rec.view(-1, 3, 64, 64), self.images.view(-1, 3, 64, 64), size_average=False) # BCE = F.mse_loss(self.rec.view(-1, 3, 64, 64), ((self.images.view(-1, 3, 64, 64) + 1 ) / 2.0), size_average=False) BCE = ((self.rec - ((self.images + 1) / 2.0))**2).mean() for i in range(10): rec = self.rec[i, 0].permute(1, 2, 0).cpu().detach().numpy() * 255.0 im = ((self.images + 1) / 2.0)[i, 0].permute( 1, 2, 0).cpu().detach().numpy() * 255.0 ex = np.concatenate([rec, im], 0) cv2.imwrite("ex" + str(i) + ".png", ex) # print(BCE) KLD = -0.5 * torch.mean(1 + self.logvar - self.mu.pow(2) - self.logvar.exp()) # print(KLD) losses = AttrDict() losses.total_loss = BCE + 0.00001 * KLD return losses
def make_prediction(self, image_pairs_stacked): im_t0, im_t1 = image_pairs_stacked[:, 0], image_pairs_stacked[:, 1] embeddings = self.encoder(torch.cat([im_t0, im_t1], dim=1)) embeddings = self.spatial_softmax(embeddings) logits = self.linear(embeddings) self.out_softmax = torch.softmax(logits, dim=1) model_output = AttrDict(logits=logits, out_softmax=self.out_softmax, img_pair=image_pairs_stacked) return model_output
def loss(self, model_output): if self._hp.low_dim: image_pairs = self.images[:, 2:] else: image_pairs = self.images[:, 3:] qs = [] for ns in range(100): actions = torch.FloatTensor( model_output.size(0), self._hp.action_size).uniform_(-1, 1).cuda() targetq = self.target_qnetwork(image_pairs, actions) qs.append(targetq) qs = torch.stack(qs) lb = self.labels.to(self._hp.device) losses = AttrDict() target = lb + self._hp.gamma * torch.max(qs, 0)[0].squeeze() losses.total_loss = F.mse_loss(target, model_output.squeeze()) self.target_qnetwork.load_state_dict(self.qnetwork.state_dict()) return losses
def make_prediction(self, image_pairs_stacked): im_t0, im_t1 = image_pairs_stacked[:, 0], image_pairs_stacked[:, 1] embeddings = self.encoder(torch.cat([im_t0, im_t1], dim=1)) if self._hp.spatial_softmax: embeddings = self.spatial_softmax(embeddings) else: embeddings = torch.flatten(embeddings, start_dim=1) for fc_layer in self.fc_layers: embeddings = F.relu(fc_layer(embeddings)) self.tdist_estimates = self.linear(embeddings) model_output = AttrDict(tdist_estimates=self.tdist_estimates, img_pair=image_pairs_stacked) return model_output
def forward(self, inputs): """ forward pass at training time :param images shape = batch x channel x height x width :return: model_output """ image_pairs = torch.cat([inputs['current_img'], inputs['goal_img']], dim=1) embeddings = self.encoder(image_pairs) embeddings = self.spatial_softmax(embeddings) fraction = torch.sigmoid(self.linear(embeddings)) model_output = AttrDict(fraction=fraction) return model_output
def get_configs(self): self.args = args = self.get_trainer_args() exp_dir = get_exp_dir() # conf_path = get_config_path(args.path) # print('loading from the config file {}'.format(conf_path)) conf_path = os.path.abspath(args.path) conf_module = imp.load_source('conf', args.path) conf = conf_module.configuration model_conf = conf_module.model_config try: data_conf = conf_module.data_config except AttributeError: data_conf_file = imp.load_source('dataset_spec',os.path.join(AttrDict(conf).data_dir, 'dataset_spec.py')) data_conf = AttrDict() data_conf.dataset_spec = AttrDict(data_conf_file.dataset_spec) if args.gpu != -1: os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu) else: os.environ["CUDA_VISIBLE_DEVICES"] = str(0) return args, conf_module, conf, model_conf, data_conf, exp_dir, conf_path
def forward(self, inputs): """ forward pass at training time :param images shape = batch x time x channel x height x width :return: model_output """ tlen = inputs.demo_seq_images.shape[1] pos_pairs, neg_pairs = self.sample_image_pair(inputs.demo_seq_images, tlen, self.tdist) image_pairs = torch.cat([pos_pairs, neg_pairs], dim=0) embeddings = self.encoder(image_pairs) embeddings = self.spatial_softmax(embeddings) fraction = torch.sigmoid(self.linear(embeddings)) model_output = AttrDict(fraction=fraction, pos_pair=self.pos_pair, neg_pair=self.neg_pair) return model_output
def train_epoch(self, epoch): self.model.train() epoch_len = len(self.train_loader) end = time.time() batch_time = AverageMeter() upto_log_time = AverageMeter() data_load_time = AverageMeter() self.log_outputs_interval = 10 self.log_images_interval = int(epoch_len / self.args.imepoch) print('starting epoch ', epoch) for self.batch_idx, sample_batched in enumerate(self.train_loader): data_load_time.update(time.time() - end) inputs = AttrDict(map_dict(lambda x: x.to(self.device), sample_batched)) self.optimizer.zero_grad() output = self.model(inputs) losses = self.model.loss(output) losses.total_loss.backward() self.optimizer.step() upto_log_time.update(time.time() - end) if self.log_outputs_now: self.model.log_outputs(output, inputs, losses, self.global_step, log_images=self.log_images_now, phase='train') batch_time.update(time.time() - end) end = time.time() if self.log_outputs_now: print('GPU {}: {}'.format(os.environ["CUDA_VISIBLE_DEVICES"] if self.use_cuda else 'none', self._hp.exp_path)) print(('itr: {} Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( self.global_step, epoch, self.batch_idx, len(self.train_loader), 100. * self.batch_idx / len(self.train_loader), losses.total_loss.item()))) print('avg time for loading: {:.2f}s, logs: {:.2f}s, compute: {:.2f}s, total: {:.2f}s' .format(data_load_time.avg, batch_time.avg - upto_log_time.avg, upto_log_time.avg - data_load_time.avg, batch_time.avg)) togo_train_time = batch_time.avg * (self._hp.num_epochs - epoch) * epoch_len / 3600. print('ETA: {:.2f}h'.format(togo_train_time)) del output, losses self.global_step = self.global_step + 1
def __getitem__(self, index): file_index = index // self.traj_per_file path = self.filenames[file_index] with h5py.File(path, 'r') as F: ex_index = index % self.traj_per_file # get the index key = 'traj{}'.format(ex_index) # Fetch data into a dict data_dict = AttrDict(images=(F[key + '/images'].value)) for name in F[key].keys(): if name in ['states', 'actions', 'pad_mask']: data_dict[name] = F[key + '/' + name].value.astype( np.float32) data_dict = self.process_data_dict(data_dict) if self._data_conf.sel_len != -1: data_dict = self.sample_rand_shifts(data_dict) return data_dict
def get_random_observations(self): hp = AttrDict(img_sz=(64, 64), sel_len=-1, T=31) dataset = FixLenVideoDataset(self._hp.graph_dataset, self.learned_cost.model._hp, hp).get_data_loader(self._hp.dloader_bs) total_images = [] dl = iter(dataset) for i in range(self._hp.graph_size // self._hp.dloader_bs): try: batch = next(dl) except StopIteration: dl = iter(dataset) batch = next(dl) images = batch['demo_seq_images'] selected_images = images[torch.arange(len(images)), torch.randint(0, images.shape[1], (len(images), ))] total_images.append(selected_images) total_images = torch.cat(total_images) return total_images
def val(self): print('Running Testing') if self.args.test_prediction: start = time.time() self.model_val.to(torch.device('cuda')) self.model_val.load_state_dict(self.model.state_dict()) if self._hp.model_test is not None: self.model_test.load_state_dict(self.model.state_dict()) losses_meter = RecursiveAverageMeter() with autograd.no_grad(): for batch_idx, sample_batched in enumerate(self.val_loader): inputs = AttrDict(map_dict(lambda x: x.to(self.device), sample_batched)) output = self.model_val(inputs) losses = self.model_val.loss(output) losses_meter.update(losses) del losses self.model_val.log_outputs( output, inputs, losses_meter.avg, self.global_step, log_images=True, phase='val') print(('\nTest set: Average loss: {:.4f} in {:.2f}s\n' .format(losses_meter.avg.total_loss.item(), time.time() - start))) del output self.model_val.to(torch.device('cpu'))
def loss(self, model_output): if self._hp.low_dim: image_pairs = self.images[:, self._hp.state_size:] else: image_pairs = self.images[:, 3:] losses = AttrDict() if self._hp.min_q: # Implement minq loss total_min_q_loss = [] self.min_q_lse = 0 for i, q_fn in enumerate(self.qnetworks): random_q_values = self.network_out_2_qval( self.compute_action_samples(self.get_sg_pair(self.images), q_fn, parallel=True, detach_grad=False)) random_density = np.log( 0.5**self._hp.action_size) # log uniform density random_q_values -= random_density min_q_loss = torch.logsumexp(random_q_values, dim=0) - np.log( self._hp.est_max_samples) min_q_loss = min_q_loss.mean() self.min_q_lse += min_q_loss total_min_q_loss.append(min_q_loss - model_output[i].mean()) total_min_q_loss = self.cql_sign * torch.stack( total_min_q_loss).mean() if self._hp.min_q_lagrange and hasattr(self, 'log_alpha'): min_q_weight = self.log_alpha.exp().squeeze() total_min_q_loss -= self._hp.min_q_eps else: min_q_weight = self._hp.min_q_weight losses.min_q_loss = min_q_weight * total_min_q_loss self.min_q_lagrange_loss = -1 * losses.min_q_loss losses.bellman_loss = self._hp.bellman_weight * self.get_td_error( image_pairs, model_output) losses.total_loss = torch.stack(list(losses.values())).sum() if 'min_q_loss' in losses: losses.min_q_loss /= min_q_weight # Divide this back out so we can compare log likelihoods return losses
def make_prediction(self, images): self.action = self.actor_network(images) model_output = AttrDict(action=self.action) return model_output
import os from classifier_control.classifier.utils.general_utils import AttrDict current_dir = os.path.dirname(os.path.realpath(__file__)) from classifier_control.classifier.models.tempdist_regressor import TempdistRegressor, TempdistRegressorTestTime from classifier_control.classifier.utils.logger import TdistRegressorLogger configuration = { 'model': TempdistRegressor, 'model_test': TempdistRegressorTestTime, 'logger': TdistRegressorLogger, 'data_dir': os.environ['VMPC_DATA'] + '/classifier_control/data_collection/sim/tabletop-reacher', # 'directory containing data.' , 'batch_size': 32, 'num_epochs': 1000, 'seed': 1, } configuration = AttrDict(configuration) data_config = AttrDict(img_sz=(64, 64), sel_len=-1, T=31) model_config = {}
elif len < target_length: raise ValueError("not enough length") else: return val @staticmethod def get_dataset_spec(data_dir): return imp.load_source('dataset_spec', os.path.join(data_dir, 'dataset_spec.py')).dataset_spec if __name__ == '__main__': data_dir = os.environ[ 'VMPC_DATA'] + '/classifier_control/data_collection/sim/1_obj_cartgripper_xz_rejsamp' hp = AttrDict(img_sz=(48, 64), sel_len=-1, T=31) loader = FixLenVideoDataset(data_dir, hp).get_data_loader(32) for i_batch, sample_batched in enumerate(loader): images = np.asarray(sample_batched['demo_seq_images']) pdb.set_trace() images = np.transpose((images + 1) / 2, [0, 1, 3, 4, 2]) # convert to channel-first actions = np.asarray(sample_batched['actions']) print('actions', actions) plt.imshow(np.asarray(images[0, 0])) plt.show()