예제 #1
0
    def extract_supervised_data(self, demo_file, noisy=False):
        """
            Load the states and actions of the demos into memory.
            Args:
                demo_file: list of demo files where each file contains expert's states and actions of one task.
        """
#         n_folders = len(os.listdir(self.demo_gif_dir))
#         gif_path = self.demo_gif_dir + self.gif_prefix + '_0/*.gif'
#         n_examples_per_folder = len(glob.glob(gif_path))
#         print(demo_file)
        
        demos = extract_demo_dict(demo_file)
        n_folders = len(demos.keys())
        N_demos = len(demos.keys())
        self.state_idx = range(demos[0]['demoX'].shape[-1])
        self._dU = demos[0]['demoU'].shape[-1]
        self._dT = demos[0]['target'].shape[-1]
        im_height = FLAGS.im_height
        im_width = FLAGS.im_width
        num_channels = FLAGS.num_channels
        self._dO = im_height*im_width*num_channels
        idx = np.arange(n_folders)
        if FLAGS.train:
            n_val = FLAGS.val_set_size # number of demos for testing
            if not hasattr(self, 'train_idx'):
                if n_val != 0:
                    if not FLAGS.shuffle_val:
                        self.val_idx = idx[-n_val:]
                        self.train_idx = idx[:-n_val]
                    else:
                        self.val_idx = np.sort(np.random.choice(idx, size=n_val, replace=False))
                        mask = np.array([(i in self.val_idx) for i in idx])
                        self.train_idx = np.sort(idx[~mask])
                else:
                    self.train_idx = idx
                    self.val_idx = []
            # Normalize the states if it's training.
            with Timer('Normalizing states'):
                if self.scale is None or self.bias is None:
                    states = np.vstack((demos[i]['demoX'] for i in self.train_idx)) # hardcoded here to solve the memory issue
                    states = states.reshape(-1, len(self.state_idx))
                    # 1e-3 to avoid infs if some state dimensions don't change in the
                    # first batch of samples
                    self.scale = np.diag(
                        1.0 / np.maximum(np.std(states, axis=0), 1e-3))
                    self.bias = - np.mean(
                        states.dot(self.scale), axis=0)
                    # Save the scale and bias.
                    with open('data/scale_and_bias_%s.pkl' % FLAGS.experiment, 'wb') as f:
                        pickle.dump({'scale': self.scale, 'bias': self.bias}, f)
                for key in demos.keys():
                    demos[key]['demoX'] = demos[key]['demoX'].reshape(-1, len(self.state_idx))
                    demos[key]['demoX'] = demos[key]['demoX'].dot(self.scale) + self.bias
                    demos[key]['demoX'] = demos[key]['demoX'].reshape(-1, self.T, len(self.state_idx))
        if not noisy:
            self.demos = demos
        else:
            self.noisy_demos = demos
예제 #2
0
 def extract_supervised_data(self, demo_file, noisy=False):
     """
         Load the states and actions of the demos into memory.
         Args:
             demo_file: list of demo files where each file contains expert's states and actions of one task.
     """
     demos = extract_demo_dict(demo_file)
     # We don't need the whole dataset of simulated pushing.
     if FLAGS.experiment == 'sim_push':
         for key in demos.keys():
             demos[key]['demoX'] = demos[key]['demoX'][6:-6, :, :].copy()
             demos[key]['demoU'] = demos[key]['demoU'][6:-6, :, :].copy()
     n_folders = len(demos.keys())
     N_demos = np.sum(demo['demoX'].shape[0]
                      for i, demo in demos.iteritems())
     self.state_idx = range(demos[0]['demoX'].shape[-1])
     self._dU = demos[0]['demoU'].shape[-1]
     print "Number of demos: %d" % N_demos
     idx = np.arange(n_folders)
     if FLAGS.train:
         n_val = FLAGS.val_set_size  # number of demos for testing
         if not hasattr(self, 'train_idx'):
             if n_val != 0:
                 if not FLAGS.shuffle_val:
                     self.val_idx = idx[-n_val:]
                     self.train_idx = idx[:-n_val]
                 else:
                     self.val_idx = np.sort(
                         np.random.choice(idx, size=n_val, replace=False))
                     mask = np.array([(i in self.val_idx) for i in idx])
                     self.train_idx = np.sort(idx[~mask])
             else:
                 self.train_idx = idx
                 self.val_idx = []
         # Normalize the states if it's training.
         with Timer('Normalizing states'):
             if self.scale is None or self.bias is None:
                 states = np.vstack(
                     (demos[i]['demoX'] for i in self.train_idx
                      ))  # hardcoded here to solve the memory issue
                 states = states.reshape(-1, len(self.state_idx))
                 # 1e-3 to avoid infs if some state dimensions don't change in the
                 # first batch of samples
                 self.scale = np.diag(
                     1.0 / np.maximum(np.std(states, axis=0), 1e-3))
                 self.bias = -np.mean(states.dot(self.scale), axis=0)
                 # Save the scale and bias.
                 with open('data/scale_and_bias_%s.pkl' % FLAGS.experiment,
                           'wb') as f:
                     pickle.dump({
                         'scale': self.scale,
                         'bias': self.bias
                     }, f)
             for key in demos.keys():
                 demos[key]['demoX'] = demos[key]['demoX'].reshape(
                     -1, len(self.state_idx))
                 demos[key]['demoX'] = demos[key]['demoX'].dot(
                     self.scale) + self.bias
                 demos[key]['demoX'] = demos[key]['demoX'].reshape(
                     -1, self.T, len(self.state_idx))
     if not noisy:
         self.demos = demos
     else:
         self.noisy_demos = demos
예제 #3
0
 def extract_supervised_data(self, demo_file, noisy=False):
     """
         Load the states and actions of the demos into memory.
         Args:
             demo_file: list of demo files where each file contains expert's states and actions of one task.
     """
     # maps key -> data
     # each demo has the following fields: u'xml', u'demoX', u'demoU
     # demoX: (24, 100, 20), demoU: (24, 100, 7)
     # guessing that this is instances i.e. num demos per task x time horizon x state dim
     # 100 timestep episode
     demos = extract_demo_dict(demo_file)
     # We don't need the whole dataset of simulated pushing.
     if FLAGS.experiment == 'sim_push':
         for key in demos.keys():
             demos[key]['demoX'] = demos[key]['demoX'][6:-6, :, :].copy()
             demos[key]['demoU'] = demos[key]['demoU'][6:-6, :, :].copy()
     # each .pkl file is being treated as a folder, these are numbered 1, ..., 768
     n_folders = len(demos.keys())
     # each row of demoX counts as a demo?
     N_demos = np.sum(demo['demoX'].shape[0]
                      for i, demo in demos.iteritems())
     # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
     self.state_idx = range(demos[0]['demoX'].shape[-1])
     # self._dU = 7
     self._dU = demos[0]['demoU'].shape[-1]
     print "Number of demos: %d" % N_demos
     # loop through each folder i.e. .pkl file with state action info
     idx = np.arange(n_folders)
     if FLAGS.train:
         n_val = FLAGS.val_set_size  # number of demos for testing
         if not hasattr(self, 'train_idx'):
             if n_val != 0:
                 if not FLAGS.shuffle_val:
                     # take the last n_val elems
                     self.val_idx = idx[-n_val:]
                     # take the rest; this will be the numbers of the pickle files used for training
                     self.train_idx = idx[:-n_val]
                 else:
                     # randomly choose val index
                     self.val_idx = np.sort(
                         np.random.choice(idx, size=n_val, replace=False))
                     mask = np.array([(i in self.val_idx) for i in idx])
                     # TODO: figure out what this does
                     self.train_idx = np.sort(idx[~mask])
             else:
                 self.train_idx = idx
                 self.val_idx = []
         # Normalize the states if it's training.
         with Timer('Normalizing states'):
             if self.scale is None or self.bias is None:
                 # get all the states for training
                 # states: (18456, 100, 20) where 18456 = len(self.train_idx) * 24
                 states = np.vstack(
                     (demos[i]['demoX'] for i in self.train_idx
                      ))  # hardcoded here to solve the memory issue
                 # states: (, num_train_pickle_files)
                 # reshape seems to be just for computing scale and bias
                 states = states.reshape(-1, len(self.state_idx))
                 # 1e-3 to avoid infs if some state dimensions don't change in the
                 # first batch of samples
                 self.scale = np.diag(
                     1.0 / np.maximum(np.std(states, axis=0), 1e-3))
                 self.bias = -np.mean(states.dot(self.scale), axis=0)
                 # Save the scale and bias.
                 with open('data/scale_and_bias_%s.pkl' % FLAGS.experiment,
                           'wb') as f:
                     pickle.dump({
                         'scale': self.scale,
                         'bias': self.bias
                     }, f)
             for key in demos.keys():
                 demos[key]['demoX'] = demos[key]['demoX'].reshape(
                     -1, len(self.state_idx))
                 demos[key]['demoX'] = demos[key]['demoX'].dot(
                     self.scale) + self.bias
                 # reshape to (?, T, state-dim)
                 demos[key]['demoX'] = demos[key]['demoX'].reshape(
                     -1, self.T, len(self.state_idx))
     if not noisy:
         self.demos = demos
     else:
         self.noisy_demos = demos