def extract_supervised_data(self, demo_file, noisy=False): """ Load the states and actions of the demos into memory. Args: demo_file: list of demo files where each file contains expert's states and actions of one task. """ # n_folders = len(os.listdir(self.demo_gif_dir)) # gif_path = self.demo_gif_dir + self.gif_prefix + '_0/*.gif' # n_examples_per_folder = len(glob.glob(gif_path)) # print(demo_file) demos = extract_demo_dict(demo_file) n_folders = len(demos.keys()) N_demos = len(demos.keys()) self.state_idx = range(demos[0]['demoX'].shape[-1]) self._dU = demos[0]['demoU'].shape[-1] self._dT = demos[0]['target'].shape[-1] im_height = FLAGS.im_height im_width = FLAGS.im_width num_channels = FLAGS.num_channels self._dO = im_height*im_width*num_channels idx = np.arange(n_folders) if FLAGS.train: n_val = FLAGS.val_set_size # number of demos for testing if not hasattr(self, 'train_idx'): if n_val != 0: if not FLAGS.shuffle_val: self.val_idx = idx[-n_val:] self.train_idx = idx[:-n_val] else: self.val_idx = np.sort(np.random.choice(idx, size=n_val, replace=False)) mask = np.array([(i in self.val_idx) for i in idx]) self.train_idx = np.sort(idx[~mask]) else: self.train_idx = idx self.val_idx = [] # Normalize the states if it's training. with Timer('Normalizing states'): if self.scale is None or self.bias is None: states = np.vstack((demos[i]['demoX'] for i in self.train_idx)) # hardcoded here to solve the memory issue states = states.reshape(-1, len(self.state_idx)) # 1e-3 to avoid infs if some state dimensions don't change in the # first batch of samples self.scale = np.diag( 1.0 / np.maximum(np.std(states, axis=0), 1e-3)) self.bias = - np.mean( states.dot(self.scale), axis=0) # Save the scale and bias. with open('data/scale_and_bias_%s.pkl' % FLAGS.experiment, 'wb') as f: pickle.dump({'scale': self.scale, 'bias': self.bias}, f) for key in demos.keys(): demos[key]['demoX'] = demos[key]['demoX'].reshape(-1, len(self.state_idx)) demos[key]['demoX'] = demos[key]['demoX'].dot(self.scale) + self.bias demos[key]['demoX'] = demos[key]['demoX'].reshape(-1, self.T, len(self.state_idx)) if not noisy: self.demos = demos else: self.noisy_demos = demos
def extract_supervised_data(self, demo_file, noisy=False): """ Load the states and actions of the demos into memory. Args: demo_file: list of demo files where each file contains expert's states and actions of one task. """ demos = extract_demo_dict(demo_file) # We don't need the whole dataset of simulated pushing. if FLAGS.experiment == 'sim_push': for key in demos.keys(): demos[key]['demoX'] = demos[key]['demoX'][6:-6, :, :].copy() demos[key]['demoU'] = demos[key]['demoU'][6:-6, :, :].copy() n_folders = len(demos.keys()) N_demos = np.sum(demo['demoX'].shape[0] for i, demo in demos.iteritems()) self.state_idx = range(demos[0]['demoX'].shape[-1]) self._dU = demos[0]['demoU'].shape[-1] print "Number of demos: %d" % N_demos idx = np.arange(n_folders) if FLAGS.train: n_val = FLAGS.val_set_size # number of demos for testing if not hasattr(self, 'train_idx'): if n_val != 0: if not FLAGS.shuffle_val: self.val_idx = idx[-n_val:] self.train_idx = idx[:-n_val] else: self.val_idx = np.sort( np.random.choice(idx, size=n_val, replace=False)) mask = np.array([(i in self.val_idx) for i in idx]) self.train_idx = np.sort(idx[~mask]) else: self.train_idx = idx self.val_idx = [] # Normalize the states if it's training. with Timer('Normalizing states'): if self.scale is None or self.bias is None: states = np.vstack( (demos[i]['demoX'] for i in self.train_idx )) # hardcoded here to solve the memory issue states = states.reshape(-1, len(self.state_idx)) # 1e-3 to avoid infs if some state dimensions don't change in the # first batch of samples self.scale = np.diag( 1.0 / np.maximum(np.std(states, axis=0), 1e-3)) self.bias = -np.mean(states.dot(self.scale), axis=0) # Save the scale and bias. with open('data/scale_and_bias_%s.pkl' % FLAGS.experiment, 'wb') as f: pickle.dump({ 'scale': self.scale, 'bias': self.bias }, f) for key in demos.keys(): demos[key]['demoX'] = demos[key]['demoX'].reshape( -1, len(self.state_idx)) demos[key]['demoX'] = demos[key]['demoX'].dot( self.scale) + self.bias demos[key]['demoX'] = demos[key]['demoX'].reshape( -1, self.T, len(self.state_idx)) if not noisy: self.demos = demos else: self.noisy_demos = demos
def extract_supervised_data(self, demo_file, noisy=False): """ Load the states and actions of the demos into memory. Args: demo_file: list of demo files where each file contains expert's states and actions of one task. """ # maps key -> data # each demo has the following fields: u'xml', u'demoX', u'demoU # demoX: (24, 100, 20), demoU: (24, 100, 7) # guessing that this is instances i.e. num demos per task x time horizon x state dim # 100 timestep episode demos = extract_demo_dict(demo_file) # We don't need the whole dataset of simulated pushing. if FLAGS.experiment == 'sim_push': for key in demos.keys(): demos[key]['demoX'] = demos[key]['demoX'][6:-6, :, :].copy() demos[key]['demoU'] = demos[key]['demoU'][6:-6, :, :].copy() # each .pkl file is being treated as a folder, these are numbered 1, ..., 768 n_folders = len(demos.keys()) # each row of demoX counts as a demo? N_demos = np.sum(demo['demoX'].shape[0] for i, demo in demos.iteritems()) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] self.state_idx = range(demos[0]['demoX'].shape[-1]) # self._dU = 7 self._dU = demos[0]['demoU'].shape[-1] print "Number of demos: %d" % N_demos # loop through each folder i.e. .pkl file with state action info idx = np.arange(n_folders) if FLAGS.train: n_val = FLAGS.val_set_size # number of demos for testing if not hasattr(self, 'train_idx'): if n_val != 0: if not FLAGS.shuffle_val: # take the last n_val elems self.val_idx = idx[-n_val:] # take the rest; this will be the numbers of the pickle files used for training self.train_idx = idx[:-n_val] else: # randomly choose val index self.val_idx = np.sort( np.random.choice(idx, size=n_val, replace=False)) mask = np.array([(i in self.val_idx) for i in idx]) # TODO: figure out what this does self.train_idx = np.sort(idx[~mask]) else: self.train_idx = idx self.val_idx = [] # Normalize the states if it's training. with Timer('Normalizing states'): if self.scale is None or self.bias is None: # get all the states for training # states: (18456, 100, 20) where 18456 = len(self.train_idx) * 24 states = np.vstack( (demos[i]['demoX'] for i in self.train_idx )) # hardcoded here to solve the memory issue # states: (, num_train_pickle_files) # reshape seems to be just for computing scale and bias states = states.reshape(-1, len(self.state_idx)) # 1e-3 to avoid infs if some state dimensions don't change in the # first batch of samples self.scale = np.diag( 1.0 / np.maximum(np.std(states, axis=0), 1e-3)) self.bias = -np.mean(states.dot(self.scale), axis=0) # Save the scale and bias. with open('data/scale_and_bias_%s.pkl' % FLAGS.experiment, 'wb') as f: pickle.dump({ 'scale': self.scale, 'bias': self.bias }, f) for key in demos.keys(): demos[key]['demoX'] = demos[key]['demoX'].reshape( -1, len(self.state_idx)) demos[key]['demoX'] = demos[key]['demoX'].dot( self.scale) + self.bias # reshape to (?, T, state-dim) demos[key]['demoX'] = demos[key]['demoX'].reshape( -1, self.T, len(self.state_idx)) if not noisy: self.demos = demos else: self.noisy_demos = demos