def main(): from rl_coach.utils import get_open_port dashboard_path = os.path.realpath(__file__) port = get_open_port() command = 'bokeh serve --show {path} --port {port}'.format( path=dashboard_path, port=port) if args.allow_remote_connection: # when allowing remote connection, selecting an experiment or a file via the GUI buttons do not seem to work # well from remote. Instead, we only allow entering an experiment dir from command line. if not args.experiment_dir and not args.experiment_files: raise ValueError( "The allow_remote_connection flag only works in conjunction with either the experiment_dir" " or the experiment_files flag. ") # allow-websocket-origin = * allows connections from a remote machine. command += ' --allow-websocket-origin=*' if args.experiment_dir or args.experiment_files: command += ' --args' if args.experiment_dir: command += ' --experiment_dir {}'.format(args.experiment_dir) if args.experiment_files: command += ' --experiment_files {}'.format(args.experiment_files) os.system(command)
def main(): from rl_coach.utils import get_open_port dashboard_path = os.path.realpath(__file__) command = 'bokeh serve --show {} --port {}'.format(dashboard_path, get_open_port()) if args.experiment_dir or args.experiment_files: command += ' --args' if args.experiment_dir: command += ' --experiment_dir {}'.format(args.experiment_dir) if args.experiment_files: command += ' --experiment_files {}'.format(args.experiment_files) os.system(command)
def start_multi_threaded(graph_manager: 'GraphManager', args: argparse.Namespace): total_tasks = args.num_workers if args.evaluation_worker: total_tasks += 1 ps_hosts = "localhost:{}".format(get_open_port()) worker_hosts = ",".join([ "localhost:{}".format(get_open_port()) for i in range(total_tasks) ]) # Shared memory class CommManager(BaseManager): pass CommManager.register('SharedMemoryScratchPad', SharedMemoryScratchPad, exposed=['add', 'get', 'internal_call']) comm_manager = CommManager() comm_manager.start() shared_memory_scratchpad = comm_manager.SharedMemoryScratchPad() if args.checkpoint_restore_file: raise ValueError( "Multi-Process runs only support restoring checkpoints from a directory, " "and not from a file. ") def start_distributed_task( job_type, task_index, evaluation_worker=False, shared_memory_scratchpad=shared_memory_scratchpad): task_parameters = DistributedTaskParameters( framework_type=args.framework, parameters_server_hosts=ps_hosts, worker_hosts=worker_hosts, job_type=job_type, task_index=task_index, evaluate_only=0 if evaluation_worker else None, # 0 value for evaluation worker as it should run infinitely use_cpu=args.use_cpu, num_tasks=total_tasks, # training tasks + 1 evaluation task num_training_tasks=args.num_workers, experiment_path=args.experiment_path, shared_memory_scratchpad=shared_memory_scratchpad, seed=args.seed + task_index if args.seed is not None else None, # each worker gets a different seed checkpoint_save_secs=args.checkpoint_save_secs, checkpoint_restore_path=args. checkpoint_restore_dir, # MonitoredTrainingSession only supports a dir checkpoint_save_dir=args.checkpoint_save_dir, export_onnx_graph=args.export_onnx_graph, apply_stop_condition=args.apply_stop_condition) # we assume that only the evaluation workers are rendering graph_manager.visualization_parameters.render = args.render and evaluation_worker p = Process(target=start_graph, args=(graph_manager, task_parameters)) # p.daemon = True p.start() return p # parameter server parameter_server = start_distributed_task("ps", 0) # training workers # wait a bit before spawning the non chief workers in order to make sure the session is already created workers = [] workers.append(start_distributed_task("worker", 0)) time.sleep(2) for task_index in range(1, args.num_workers): workers.append(start_distributed_task("worker", task_index)) # evaluation worker if args.evaluation_worker or args.render: evaluation_worker = start_distributed_task("worker", args.num_workers, evaluation_worker=True) # wait for all workers [w.join() for w in workers] if args.evaluation_worker: evaluation_worker.terminate()
def __init__(self, level: LevelSelection, seed: int, frame_skip: int, human_control: bool, custom_reward_threshold: Union[int, float], visualization_parameters: VisualizationParameters, server_height: int, server_width: int, camera_height: int, camera_width: int, verbose: bool, experiment_suite: ExperimentSuite, config: str, episode_max_time: int, allow_braking: bool, quality: CarlaEnvironmentParameters.Quality, cameras: List[CameraTypes], weather_id: List[int], experiment_path: str, separate_actions_for_throttle_and_brake: bool, num_speedup_steps: int, max_speed: float, **kwargs): super().__init__(level, seed, frame_skip, human_control, custom_reward_threshold, visualization_parameters) # server configuration self.server_height = server_height self.server_width = server_width self.port = get_open_port() self.host = 'localhost' self.map_name = CarlaLevel[level.upper()].value['map_name'] self.map_path = CarlaLevel[level.upper()].value['map_path'] self.experiment_path = experiment_path # client configuration self.verbose = verbose self.quality = quality self.cameras = cameras self.weather_id = weather_id self.episode_max_time = episode_max_time self.allow_braking = allow_braking self.separate_actions_for_throttle_and_brake = separate_actions_for_throttle_and_brake self.camera_width = camera_width self.camera_height = camera_height # setup server settings self.experiment_suite = experiment_suite self.config = config if self.config: # load settings from file with open(self.config, 'r') as fp: self.settings = fp.read() else: # hard coded settings self.settings = CarlaSettings() self.settings.set(SynchronousMode=True, SendNonPlayerAgentsInfo=False, NumberOfVehicles=15, NumberOfPedestrians=30, WeatherId=random.choice( force_list(self.weather_id)), QualityLevel=self.quality.value, SeedVehicles=seed, SeedPedestrians=seed) if seed is None: self.settings.randomize_seeds() self.settings = self._add_cameras(self.settings, self.cameras, self.camera_width, self.camera_height) # open the server self.server = self._open_server() logging.disable(40) # open the client self.game = CarlaClient(self.host, self.port, timeout=99999999) self.game.connect() if self.experiment_suite: self.current_experiment_idx = 0 self.current_experiment = self.experiment_suite.get_experiments()[ self.current_experiment_idx] self.scene = self.game.load_settings( self.current_experiment.conditions) else: self.scene = self.game.load_settings(self.settings) # get available start positions self.positions = self.scene.player_start_spots self.num_positions = len(self.positions) self.current_start_position_idx = 0 self.current_pose = 0 # state space self.state_space = StateSpace({ "measurements": VectorObservationSpace( 4, measurements_names=["forward_speed", "x", "y", "z"]) }) for camera in self.scene.sensors: self.state_space[camera.name] = ImageObservationSpace( shape=np.array([self.camera_height, self.camera_width, 3]), high=255) # action space if self.separate_actions_for_throttle_and_brake: self.action_space = BoxActionSpace( shape=3, low=np.array([-1, 0, 0]), high=np.array([1, 1, 1]), descriptions=["steer", "gas", "brake"]) else: self.action_space = BoxActionSpace( shape=2, low=np.array([-1, -1]), high=np.array([1, 1]), descriptions=["steer", "gas_and_brake"]) # human control if self.human_control: # convert continuous action space to discrete self.steering_strength = 0.5 self.gas_strength = 1.0 self.brake_strength = 0.5 # TODO: reverse order of actions self.action_space = PartialDiscreteActionSpaceMap( target_actions=[[0., 0.], [0., -self.steering_strength], [0., self.steering_strength], [self.gas_strength, 0.], [-self.brake_strength, 0], [self.gas_strength, -self.steering_strength], [self.gas_strength, self.steering_strength], [self.brake_strength, -self.steering_strength], [self.brake_strength, self.steering_strength]], descriptions=[ 'NO-OP', 'TURN_LEFT', 'TURN_RIGHT', 'GAS', 'BRAKE', 'GAS_AND_TURN_LEFT', 'GAS_AND_TURN_RIGHT', 'BRAKE_AND_TURN_LEFT', 'BRAKE_AND_TURN_RIGHT' ]) # map keyboard keys to actions for idx, action in enumerate(self.action_space.descriptions): for key in key_map.keys(): if action == key: self.key_to_action[key_map[key]] = idx self.num_speedup_steps = num_speedup_steps self.max_speed = max_speed # measurements self.autopilot = None self.planner = Planner(self.map_name) # env initialization self.reset_internal_state(True) # render if self.is_rendered: image = self.get_rendered_image() self.renderer.create_screen(image.shape[1], image.shape[0])
def main(): parser = argparse.ArgumentParser() parser.add_argument( '-p', '--preset', help= "(string) Name of a preset to run (class name from the 'presets' directory.)", default=None, type=str) parser.add_argument('-l', '--list', help="(flag) List all available presets", action='store_true') parser.add_argument( '-e', '--experiment_name', help="(string) Experiment name to be used to store the results.", default='', type=str) parser.add_argument('-r', '--render', help="(flag) Render environment", action='store_true') parser.add_argument( '-f', '--framework', help="(string) Neural network framework. Available values: tensorflow", default='tensorflow', type=str) parser.add_argument( '-n', '--num_workers', help="(int) Number of workers for multi-process based agents, e.g. A3C", default=1, type=int) parser.add_argument( '-c', '--use_cpu', help= "(flag) Use only the cpu for training. If a GPU is not available, this flag will have no " "effect and the CPU will be used either way.", action='store_true') parser.add_argument( '-ew', '--evaluation_worker', help= "(int) If multiple workers are used, add an evaluation worker as well which will " "evaluate asynchronously and independently during the training. NOTE: this worker will " "ignore the evaluation settings in the preset's ScheduleParams.", action='store_true') parser.add_argument( '--play', help="(flag) Play as a human by controlling the game with the keyboard. " "This option will save a replay buffer with the game play.", action='store_true') parser.add_argument( '--evaluate', help="(flag) Run evaluation only. This is a convenient way to disable " "training in order to evaluate an existing checkpoint.", action='store_true') parser.add_argument( '-v', '--verbosity', help= "(flag) Sets the verbosity level of Coach print outs. Can be either low or high.", default="low", type=str) parser.add_argument('-tfv', '--tf_verbosity', help="(flag) TensorFlow verbosity level", default=3, type=int) parser.add_argument( '-s', '--save_checkpoint_secs', help="(int) Time in seconds between saving checkpoints of the model.", default=None, type=int) parser.add_argument( '-crd', '--checkpoint_restore_dir', help= '(string) Path to a folder containing a checkpoint to restore the model from.', type=str) parser.add_argument('-dg', '--dump_gifs', help="(flag) Enable the gif saving functionality.", action='store_true') parser.add_argument('-dm', '--dump_mp4', help="(flag) Enable the mp4 saving functionality.", action='store_true') parser.add_argument( '-at', '--agent_type', help= "(string) Choose an agent type class to override on top of the selected preset. " "If no preset is defined, a preset can be set from the command-line by combining settings " "which are set by using --agent_type, --experiment_type, --environemnt_type", default=None, type=str) parser.add_argument( '-et', '--environment_type', help= "(string) Choose an environment type class to override on top of the selected preset." "If no preset is defined, a preset can be set from the command-line by combining settings " "which are set by using --agent_type, --experiment_type, --environemnt_type", default=None, type=str) parser.add_argument( '-ept', '--exploration_policy_type', help= "(string) Choose an exploration policy type class to override on top of the selected " "preset." "If no preset is defined, a preset can be set from the command-line by combining settings " "which are set by using --agent_type, --experiment_type, --environemnt_type", default=None, type=str) parser.add_argument( '-lvl', '--level', help= "(string) Choose the level that will be played in the environment that was selected." "This value will override the level parameter in the environment class.", default=None, type=str) parser.add_argument( '-cp', '--custom_parameter', help= "(string) Semicolon separated parameters used to override specific parameters on top of" " the selected preset (or on top of the command-line assembled one). " "Whenever a parameter value is a string, it should be inputted as '\\\"string\\\"'. " "For ex.: " "\"visualization.render=False; num_training_iterations=500; optimizer='rmsprop'\"", default=None, type=str) parser.add_argument('--print_networks_summary', help="(flag) Print network summary to stdout", action='store_true') parser.add_argument( '-tb', '--tensorboard', help= "(flag) When using the TensorFlow backend, enable TensorBoard log dumps. ", action='store_true') parser.add_argument( '-ns', '--no_summary', help= "(flag) Prevent Coach from printing a summary and asking questions at the end of runs", action='store_true') parser.add_argument( '-d', '--open_dashboard', help="(flag) Open dashboard with the experiment when the run starts", action='store_true') parser.add_argument('--seed', help="(int) A seed to use for running the experiment", default=None, type=int) parser.add_argument( '--ray_redis_address', help= "The address of the Redis server to connect to. If this address is not provided,\ then this command will start Redis, a global scheduler, a local scheduler, \ a plasma store, a plasma manager, and some workers. \ It will also kill these processes when Python exits.", default=None, type=str) parser.add_argument( '--ray_num_cpus', help= "Number of cpus the user wishes all local schedulers to be configured with", default=None, type=int) parser.add_argument( '--ray_num_gpus', help= "Number of gpus the user wishes all local schedulers to be configured with", default=None, type=int) parser.add_argument( '--on_devcloud', help= "Number of gpus the user wishes all local schedulers to be configured with", default=False, type=bool) args = parse_arguments(parser) graph_manager = get_graph_manager_from_args(args) # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread. # This will not affect GPU runs. # os.environ["OMP_NUM_THREADS"] = "1" # turn TF debug prints off if args.framework == Frameworks.tensorflow: os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_verbosity) # turn off the summary at the end of the run if necessary if not args.no_summary: atexit.register(logger.summarize_experiment) screen.change_terminal_title(args.experiment_name) # open dashboard if args.open_dashboard: open_dashboard(args.experiment_path) # Single-threaded runs if args.num_workers == 1: # Start the training or evaluation task_parameters = TaskParameters( framework_type= "tensorflow", # TODO: tensorflow should'nt be hardcoded evaluate_only=args.evaluate, experiment_path=args.experiment_path, seed=args.seed, use_cpu=args.use_cpu, save_checkpoint_secs=args.save_checkpoint_secs) task_parameters.__dict__ = add_items_to_dict(task_parameters.__dict__, args.__dict__) start_graph(graph_manager=graph_manager, task_parameters=task_parameters) #start_graph_ray.remote(graph_manager,task_parameters) # Multi-threaded runs else: #ray.init(redis_address=args.ray_redis_address, # num_cpus=args.ray_num_cpus, # num_gpus=args.ray_num_gpus) total_tasks = args.num_workers if args.evaluation_worker: total_tasks += 1 if args.on_devcloud: ips = create_worker_devcloud(args.num_workers) @ray.remote def f(): time.sleep(0.01) #os.system('/usr/local/bin/qstat') return ray.services.get_node_ip_address() if args.on_devcloud: ips = set(ray.get([f.remote() for _ in range(1000)])) home_ip = socket.gethostbyname(socket.gethostname()) worker_ips = [z for z in ips if z != home_ip] worker_hosts = ",".join( ["{}:{}".format(n, get_open_port()) for n in ips]) else: ray.init() worker_hosts = ",".join([ "localhost:{}".format(get_open_port()) for i in range(total_tasks) ]) ps_hosts = "localhost:{}".format(get_open_port()) @ray.remote def start_distributed_task(job_type, task_index, evaluation_worker=False): task_parameters = DistributedTaskParameters( framework_type= "tensorflow", # TODO: tensorflow should'nt be hardcoded parameters_server_hosts=ps_hosts, worker_hosts=worker_hosts, job_type=job_type, task_index=task_index, evaluate_only=evaluation_worker, use_cpu=args.use_cpu, num_tasks=total_tasks, # training tasks + 1 evaluation task num_training_tasks=args.num_workers, experiment_path=args.experiment_path, shared_memory_scratchpad=None, seed=args.seed + task_index if args.seed is not None else None) # each worker gets a different seed task_parameters.__dict__ = add_items_to_dict( task_parameters.__dict__, args.__dict__) # we assume that only the evaluation workers are rendering graph_manager.visualization_parameters.render = args.render and evaluation_worker start_graph(graph_manager, task_parameters) #p = Process(target=start_graph, args=(graph_manager, task_parameters)) #p.start() return @ray.remote def start_distributed_ray_task(job_type, task_index, evaluation_worker=False): task_parameters = DistributedTaskParameters( framework_type= "tensorflow", # TODO: tensorflow should'nt be hardcoded parameters_server_hosts=ps_hosts, worker_hosts=worker_hosts, job_type=job_type, task_index=task_index, evaluate_only=evaluation_worker, use_cpu=args.use_cpu, num_tasks=total_tasks, # training tasks + 1 evaluation task num_training_tasks=args.num_workers, experiment_path=args.experiment_path, shared_memory_scratchpad=None, seed=args.seed + task_index if args.seed is not None else None) # each worker gets a different seed task_parameters.__dict__ = add_items_to_dict( task_parameters.__dict__, args.__dict__) # we assume that only the evaluation workers are rendering graph_manager.visualization_parameters.render = args.render and evaluation_worker start_graph(graph_manager, task_parameters) return 1 # parameter server parameter_server = start_distributed_task.remote("ps", 0) # training workers # wait a bit before spawning the non chief workers in order to make sure the session is already created workers = [] workers.append(start_distributed_task.remote("worker", 0)) time.sleep(2) for task_index in range(1, args.num_workers): workers.append(start_distributed_task.remote("worker", task_index))