def iteration(self): _, observations, actions, rewards = do_rollouts(self.env, self.noisy_pol, self.num_trajs, self.num_steps, seeds=np.arange( self.num_trajs)) def preprocess_obs(obs): for obs_name, obs_ in obs.items(): if obs_name.endswith('image'): obs[obs_name], = self.servoing_pol.predictor.preprocess( [obs_]) return obs preprocess_action = self.servoing_pol.predictor.transformers[ 'action'].preprocess observations = [[preprocess_obs(obs) for obs in observations_] for observations_ in observations] actions = [[preprocess_action(action) for action in actions_] for actions_ in actions] rewards = [[-reward for reward in rewards_] for rewards_ in rewards] # use costs observations, observations_p = split_observations(observations) return self.update(observations, actions, rewards, observations_p)
def _noisy_evaluation(self, theta): # save servoing parameters # w, lambda_ = self.servoing_pol.w.copy(), self.servoing_pol.lambda_.copy() curr_theta = self.servoing_pol.theta.copy() self.servoing_pol.theta = theta rewards = do_rollouts(self.env, self.noisy_pol, self.num_trajs, self.num_steps, seeds=np.arange(self.num_trajs), ret_rewards_only=True) # restore servoing parameters # self.servoing_pol.w, self.servoing_pol.lambda_ = w, lambda_ self.servoing_pol.theta = curr_theta return np.mean(discount_returns( rewards, 1.0)) # using undiscounted returns for noisy evaluation
def run(self): if self.plot: fig_plotters = self.visualization_init() fig, plotters = fig_plotters[0], fig_plotters[1:] while self.iter_ <= self.sampling_iters: print("Iteration {} of {}".format(self.iter_, self.sampling_iters)) self.thetas.append(self.servoing_pol.theta.copy()) if not self.skip_validation: rewards = do_rollouts(self.env, self.servoing_pol, self.num_trajs, self.num_steps, seeds=np.arange(self.num_trajs), ret_rewards_only=True) returns = discount_returns(rewards, 1.0) mean_return = np.mean(returns) std_return = np.std(returns) / np.sqrt(len(returns)) self.mean_returns.append(mean_return) self.std_returns.append(std_return) discounted_returns = discount_returns(rewards, self.gamma) mean_discounted_return = np.mean(discounted_returns) std_discounted_return = np.std(discounted_returns) / np.sqrt( len(discounted_returns)) self.mean_discounted_returns.append(mean_discounted_return) self.std_discounted_returns.append(std_discounted_return) print(" mean discounted return = {:.6f} ({:.6f})".format( mean_discounted_return, std_discounted_return)) print(" mean return = {:.6f} ({:.6f})".format( mean_return, std_return)) if self.iter_ < self.sampling_iters: learning_value = self.iteration() self.learning_values.append(np.asarray(learning_value)) if self.plot: self.visualization_update(*plotters) learning_fig_fname = self.get_snapshot_fname('_learning.pdf') fig.savefig(learning_fig_fname) if self.snapshot_interval and ( self.iter_ % self.snapshot_interval == 0 or self.iter_ == self.sampling_iters): self.snapshot() self.iter_ += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('predictor_fname', type=str) parser.add_argument('--output_dir', '-o', type=str, default=None) parser.add_argument('--num_trajs', '-n', type=int, default=10, metavar='N', help='total number of data points is N*T') parser.add_argument('--num_steps', '-t', type=int, default=10, metavar='T', help='number of time steps per trajectory') parser.add_argument('--gamma', type=float, default=0.9) parser.add_argument('--visualize', '-v', type=int, default=None) parser.add_argument('--record_file', '-r', type=str, default=None) parser.add_argument('--target_distance', '-d', type=int, default=0) parser.add_argument('--feature_inds', '-i', type=str, help='inds of subset of features to use') parser.add_argument('--w_init', type=float, default=1.0) parser.add_argument('--lambda_init', type=float, default=1.0) args = parser.parse_args() with open(args.predictor_fname) as predictor_file: predictor_config = yaml.load(predictor_file) if issubclass(predictor_config['environment_config']['class'], envs.Panda3dEnv): transfer_image_transformer(predictor_config) predictor = from_config(predictor_config) if args.feature_inds: args.feature_inds = [int(ind) for ind in args.feature_inds] predictor.feature_name = [ predictor.feature_name[ind] for ind in args.feature_inds ] predictor.next_feature_name = [ predictor.next_feature_name[ind] for ind in args.feature_inds ] if issubclass(predictor.environment_config['class'], envs.RosEnv): import rospy rospy.init_node("visual_servoing") env = from_config(predictor.environment_config) if not isinstance(env, ServoingEnv): env = ServoingEnv(env, max_time_steps=args.num_steps) pol = policies.ServoingPolicy(predictor, alpha=1.0, lambda_=args.lambda_init, w=args.w_init) if args.record_file and not args.visualize: args.visualize = 1 if args.visualize: image_visualizer = FeaturePredictorServoingImageVisualizer( predictor, visualize=args.visualize) do_rollouts(env, pol, args.num_trajs, args.num_steps, target_distance=args.target_distance, output_dir=args.output_dir, image_visualizer=image_visualizer, record_file=args.record_file, gamma=args.gamma, verbose=True)
def main(): parser = argparse.ArgumentParser() parser.add_argument('algorithm_fname', type=str, nargs='*') parser.add_argument('--start_traj_iter', '-s', type=int, default=0) parser.add_argument('--num_trajs', '-n', type=int, default=100, metavar='N', help='number of trajectories') parser.add_argument('--num_steps', '-t', type=int, default=100, metavar='T', help='maximum number of time steps per trajectory') parser.add_argument('--visualize', '-v', type=int, default=0) parser.add_argument('--record_file', '-r', type=str) parser.add_argument('--reset_states_fname', type=str) parser.add_argument('--verbose', action='store_true') parser.add_argument('--output_fname', '-o', type=str) parser.add_argument('--observations_dir', '-d', type=str) parser.add_argument('--use_last', action='store_true') args = parser.parse_args() algorithm_configs = [] for algorithm_fname in args.algorithm_fname: with open(algorithm_fname) as algorithm_file: algorithm_config = yaml.load(algorithm_file, Loader=Python2to3Loader) algorithm_configs.append(algorithm_config) best_return, algorithm_config = max( zip([ max(algorithm_config['mean_returns']) for algorithm_config in algorithm_configs ], algorithm_configs)) if args.reset_states_fname is None: reset_states = [None] * args.num_trajs else: with open(args.reset_states_fname, 'r') as reset_state_file: reset_state_config = from_yaml(reset_state_file) algorithm_config['env']['env']['car_model_names'] = reset_state_config[ 'environment_config']['car_model_names'] reset_states = reset_state_config['reset_states'] args.num_trajs = min(args.num_trajs, len(reset_states)) alg = from_config(algorithm_config) env = alg.env servoing_pol = alg.servoing_pol if args.use_last: print("using parameters of the last iteration") best_return, best_theta = list(zip(alg.mean_returns, alg.thetas))[-1] else: print("using parameters based on best returns") best_return, best_theta = max(zip(alg.mean_returns, alg.thetas)) print(best_return) servoing_pol.theta = best_theta print(servoing_pol.theta) if args.output_fname: import csv with open(args.output_fname, 'a') as csvfile: writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow([str(best_return)]) writer.writerow(map(str, servoing_pol.theta)) image_transformer = extract_image_transformer( dict(servoing_pol.predictor.transformers)) image_visualizer = FeaturePredictorServoingImageVisualizer( servoing_pol.predictor, visualize=args.visualize) if args.observations_dir: _, observations, _, rewards = do_rollouts( env, servoing_pol, args.num_trajs, args.num_steps, image_visualizer=image_visualizer, verbose=args.verbose, seeds=np.arange(args.num_trajs), reset_states=reset_states, cv2_record_file=args.record_file, image_transformer=image_transformer) container = None for traj_iter, observations_ in enumerate(observations): for step_iter, obs in enumerate(observations_): if container is None: container = ImageDataContainer(args.observations_dir, 'x') container.reserve(list(obs.keys()), (args.num_trajs, args.num_steps + 1)) container.add_datum(traj_iter, step_iter, **obs) container.close() else: rewards = do_rollouts(env, servoing_pol, args.num_trajs, args.num_steps, image_visualizer=image_visualizer, verbose=args.verbose, seeds=np.arange(args.num_trajs), reset_states=reset_states, cv2_record_file=args.record_file, image_transformer=image_transformer, ret_rewards_only=True) if args.output_fname: import csv discounted_returns = discount_returns(rewards, alg.gamma) returns = discount_returns(rewards, 1.0) with open(args.output_fname, 'a') as csvfile: writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow(map(str, discounted_returns)) writer.writerow([ str(np.mean(discounted_returns)), str( np.std(discounted_returns) / np.sqrt(len(discounted_returns))) ]) writer.writerow(map(str, returns)) writer.writerow([ str(np.mean(returns)), str(np.std(returns) / np.sqrt(len(returns))) ])
def main(): parser = argparse.ArgumentParser() parser.add_argument('predictor_fname', type=str) parser.add_argument('--num_trajs', '-n', type=int, default=10, metavar='N', help='total number of data points is N*T') parser.add_argument('--num_steps', '-t', type=int, default=10, metavar='T', help='number of time steps per trajectory') parser.add_argument('--gamma', type=float, default=0.9) parser.add_argument('--visualize', '-v', type=int, default=None) parser.add_argument('--target_distance', '-d', type=int, default=0) parser.add_argument('--feature_inds', '-i', type=str, help='inds of subset of features to use') parser.add_argument('--w_inits', nargs='+', type=float, default=list(range(1, 101))) parser.add_argument('--lambda_inits', nargs='+', type=float, default=[1.0]) parser.add_argument('--output_fname', '-o', type=str) args = parser.parse_args() with open(args.predictor_fname) as predictor_file: predictor_config = yaml.load(predictor_file) if issubclass(predictor_config['environment_config']['class'], envs.Panda3dEnv): transfer_image_transformer(predictor_config) predictor = from_config(predictor_config) if args.feature_inds: args.feature_inds = [int(ind) for ind in args.feature_inds] predictor.feature_name = [predictor.feature_name[ind] for ind in args.feature_inds] predictor.next_feature_name = [predictor.next_feature_name[ind] for ind in args.feature_inds] if issubclass(predictor.environment_config['class'], envs.RosEnv): import rospy rospy.init_node("validate_visual_servoing") env = from_config(predictor.environment_config) if not isinstance(env, ServoingEnv): env = ServoingEnv(env, max_time_steps=args.num_steps) if args.visualize: image_visualizer = FeaturePredictorServoingImageVisualizer(predictor, visualize=args.visualize) else: image_visualizer = None header_format = '{:>15}\t{:>15}\t{:>15}\t{:>15}' row_format = '{:>15.2f}\t{:>15.2f}\t{:>15.4f}\t{:>15.4f}' print(header_format.format('w_init', 'lambda_init', 'mean', 'standard error')) print('=' * 60) if args.output_fname: with open(args.output_fname, 'a') as csvfile: writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow(['w_init', 'lambda_init', 'mean', 'standard error']) for w_init in args.w_inits: for lambda_init in args.lambda_inits: pol = policies.ServoingPolicy(predictor, alpha=1.0, lambda_=lambda_init, w=w_init) _, _, _, rewards = do_rollouts(env, pol, args.num_trajs, args.num_steps, target_distance=args.target_distance, image_visualizer=image_visualizer, gamma=args.gamma, seeds=np.arange(args.num_trajs)) discounted_returns = discount_returns(rewards, args.gamma) row_values = [w_init, lambda_init, np.mean(discounted_returns), np.std(discounted_returns) / np.sqrt(len(discounted_returns))] print(row_format.format(*row_values)) if args.output_fname: with open(args.output_fname, 'a') as csvfile: writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow([str(value) for value in (row_values + list(discounted_returns))] + [str(discounted_return) for discounted_return in discounted_returns])
def main(): parser = argparse.ArgumentParser() parser.add_argument('predictor_fname', type=str) parser.add_argument('algorithm_fname', type=str) parser.add_argument('--algorithm_init_fname', type=str, default=None) parser.add_argument('--output_dir', '-o', type=str, default=None) parser.add_argument('--visualize', '-v', type=int, default=None) parser.add_argument('--record_file', '-r', type=str, default=None) parser.add_argument('--cv2_record_file', type=str, default=None) parser.add_argument('--w_init', type=float, nargs='+', default=1.0) parser.add_argument('--lambda_init', type=float, nargs='+', default=1.0) args = parser.parse_args() with open(args.predictor_fname) as predictor_file: predictor_config = yaml.load(predictor_file, Loader=Python2to3Loader) predictor = from_config(predictor_config) if issubclass(predictor.environment_config['class'], envs.RosEnv): import rospy rospy.init_node("learn_visual_servoing") env = from_config(predictor.environment_config) if not isinstance(env, ServoingEnv): env = ServoingEnv(env) servoing_pol = policies.TheanoServoingPolicy(predictor, alpha=1.0, lambda_=args.lambda_init, w=args.w_init) with open(args.algorithm_fname) as algorithm_file: algorithm_config = yaml.load(algorithm_file, Loader=Python2to3Loader) algorithm_config['env'] = env algorithm_config['servoing_pol'] = servoing_pol if 'snapshot_prefix' not in algorithm_config: snapshot_prefix_paths = [ os.path.split(args.predictor_fname)[0], os.path.splitext(os.path.split(args.algorithm_fname)[1])[0] ] now = datetime.datetime.now(dateutil.tz.tzlocal()) rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') snapshot_prefix_paths.append('%s_%s' % (timestamp, rand_id)) snapshot_prefix_paths.append('') snapshot_prefix = os.path.join(*snapshot_prefix_paths) algorithm_config['snapshot_prefix'] = snapshot_prefix alg = from_config(algorithm_config) # TODO: concatenate an arbitrary number of algorithms if args.algorithm_init_fname: with open(args.algorithm_init_fname) as algorithm_init_file: algorithm_init_config = yaml.load(algorithm_init_file, Loader=Python2to3Loader) print("using parameters based on best returns") best_return, best_theta = max( zip(algorithm_init_config['mean_returns'], algorithm_init_config['thetas'])) print(best_return) # servoing_pol.theta = best_theta servoing_pol.w = best_theta[:-len(servoing_pol.lambda_)] servoing_pol.lambda_ = best_theta[-len(servoing_pol.lambda_):] print(servoing_pol.theta) alg.run() print(servoing_pol.theta) if args.record_file and not args.visualize: args.visualize = 1 if args.visualize: image_visualizer = FeaturePredictorServoingImageVisualizer( predictor, visualize=args.visualize) do_rollouts(env, servoing_pol, alg.num_trajs, alg.num_steps, output_dir=args.output_dir, image_visualizer=image_visualizer, record_file=args.record_file, verbose=True, gamma=alg.gamma)
def main(): parser = argparse.ArgumentParser() parser.add_argument('algorithm_fnames', nargs='+', type=str) args = parser.parse_args() for algorithm_fname in args.algorithm_fnames: with open(algorithm_fname) as algorithm_file: algorithm_config = yaml.load(algorithm_file) mean_discounted_returns = algorithm_config['mean_discounted_returns'] row_values = [algorithm_fname, max(mean_discounted_returns)] + mean_discounted_returns print('\t'.join([str(value) for value in row_values])) return header_format = '{:>15}\t{:>15}\t{:>15}\t{:>15}' row_format = '{:>15.2f}\t{:>15.2f}\t{:>15.4f}\t{:>15.4f}' print( header_format.format('w_init', 'lambda_init', 'mean', 'standard error')) print('=' * 60) if args.output_fname: with open(args.output_fname, 'ab') as csvfile: writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow( ['w_init', 'lambda_init', 'mean', 'standard error']) for w_init in args.w_inits: for lambda_init in args.lambda_inits: pol = policies.ServoingPolicy(predictor, alpha=1.0, lambda_=lambda_init, w=w_init) _, _, _, rewards = do_rollouts( env, pol, args.num_trajs, args.num_steps, target_distance=args.target_distance, image_visualizer=image_visualizer, gamma=args.gamma, seeds=np.arange(args.num_trajs)) discounted_returns = discount_returns(rewards, args.gamma) row_values = [ w_init, lambda_init, np.mean(discounted_returns), np.std(discounted_returns) / np.sqrt(len(discounted_returns)) ] print(row_format.format(*row_values)) if args.output_fname: with open(args.output_fname, 'ab') as csvfile: writer = csv.writer(csvfile, delimiter='\t', quotechar='|', quoting=csv.QUOTE_MINIMAL) writer.writerow([ str(value) for value in (row_values + list(discounted_returns)) ])