def main():
    from rl_coach.utils import get_open_port

    dashboard_path = os.path.realpath(__file__)
    port = get_open_port()
    command = 'bokeh serve --show {path} --port {port}'.format(
        path=dashboard_path, port=port)

    if args.allow_remote_connection:
        # when allowing remote connection, selecting an experiment or a file via the GUI buttons do not seem to work
        # well from remote. Instead, we only allow entering an experiment dir from command line.
        if not args.experiment_dir and not args.experiment_files:
            raise ValueError(
                "The allow_remote_connection flag only works in conjunction with either the experiment_dir"
                " or the experiment_files flag. ")

        # allow-websocket-origin = * allows connections from a remote machine.
        command += ' --allow-websocket-origin=*'

    if args.experiment_dir or args.experiment_files:
        command += ' --args'
        if args.experiment_dir:
            command += ' --experiment_dir {}'.format(args.experiment_dir)
        if args.experiment_files:
            command += ' --experiment_files {}'.format(args.experiment_files)

    os.system(command)
示例#2
0
def main():
    from rl_coach.utils import get_open_port

    dashboard_path = os.path.realpath(__file__)
    command = 'bokeh serve --show {} --port {}'.format(dashboard_path,
                                                       get_open_port())
    if args.experiment_dir or args.experiment_files:
        command += ' --args'
        if args.experiment_dir:
            command += ' --experiment_dir {}'.format(args.experiment_dir)
        if args.experiment_files:
            command += ' --experiment_files {}'.format(args.experiment_files)

    os.system(command)
示例#3
0
    def start_multi_threaded(graph_manager: 'GraphManager',
                             args: argparse.Namespace):
        total_tasks = args.num_workers
        if args.evaluation_worker:
            total_tasks += 1

        ps_hosts = "localhost:{}".format(get_open_port())
        worker_hosts = ",".join([
            "localhost:{}".format(get_open_port()) for i in range(total_tasks)
        ])

        # Shared memory
        class CommManager(BaseManager):
            pass

        CommManager.register('SharedMemoryScratchPad',
                             SharedMemoryScratchPad,
                             exposed=['add', 'get', 'internal_call'])
        comm_manager = CommManager()
        comm_manager.start()
        shared_memory_scratchpad = comm_manager.SharedMemoryScratchPad()

        if args.checkpoint_restore_file:
            raise ValueError(
                "Multi-Process runs only support restoring checkpoints from a directory, "
                "and not from a file. ")

        def start_distributed_task(
                job_type,
                task_index,
                evaluation_worker=False,
                shared_memory_scratchpad=shared_memory_scratchpad):
            task_parameters = DistributedTaskParameters(
                framework_type=args.framework,
                parameters_server_hosts=ps_hosts,
                worker_hosts=worker_hosts,
                job_type=job_type,
                task_index=task_index,
                evaluate_only=0 if evaluation_worker else
                None,  # 0 value for evaluation worker as it should run infinitely
                use_cpu=args.use_cpu,
                num_tasks=total_tasks,  # training tasks + 1 evaluation task
                num_training_tasks=args.num_workers,
                experiment_path=args.experiment_path,
                shared_memory_scratchpad=shared_memory_scratchpad,
                seed=args.seed + task_index if args.seed is not None else
                None,  # each worker gets a different seed
                checkpoint_save_secs=args.checkpoint_save_secs,
                checkpoint_restore_path=args.
                checkpoint_restore_dir,  # MonitoredTrainingSession only supports a dir
                checkpoint_save_dir=args.checkpoint_save_dir,
                export_onnx_graph=args.export_onnx_graph,
                apply_stop_condition=args.apply_stop_condition)
            # we assume that only the evaluation workers are rendering
            graph_manager.visualization_parameters.render = args.render and evaluation_worker
            p = Process(target=start_graph,
                        args=(graph_manager, task_parameters))
            # p.daemon = True
            p.start()
            return p

        # parameter server
        parameter_server = start_distributed_task("ps", 0)

        # training workers
        # wait a bit before spawning the non chief workers in order to make sure the session is already created
        workers = []
        workers.append(start_distributed_task("worker", 0))
        time.sleep(2)
        for task_index in range(1, args.num_workers):
            workers.append(start_distributed_task("worker", task_index))

        # evaluation worker
        if args.evaluation_worker or args.render:
            evaluation_worker = start_distributed_task("worker",
                                                       args.num_workers,
                                                       evaluation_worker=True)

        # wait for all workers
        [w.join() for w in workers]
        if args.evaluation_worker:
            evaluation_worker.terminate()
示例#4
0
    def __init__(self, level: LevelSelection, seed: int, frame_skip: int,
                 human_control: bool, custom_reward_threshold: Union[int,
                                                                     float],
                 visualization_parameters: VisualizationParameters,
                 server_height: int, server_width: int, camera_height: int,
                 camera_width: int, verbose: bool,
                 experiment_suite: ExperimentSuite, config: str,
                 episode_max_time: int, allow_braking: bool,
                 quality: CarlaEnvironmentParameters.Quality,
                 cameras: List[CameraTypes], weather_id: List[int],
                 experiment_path: str,
                 separate_actions_for_throttle_and_brake: bool,
                 num_speedup_steps: int, max_speed: float, **kwargs):
        super().__init__(level, seed, frame_skip, human_control,
                         custom_reward_threshold, visualization_parameters)

        # server configuration
        self.server_height = server_height
        self.server_width = server_width
        self.port = get_open_port()
        self.host = 'localhost'
        self.map_name = CarlaLevel[level.upper()].value['map_name']
        self.map_path = CarlaLevel[level.upper()].value['map_path']
        self.experiment_path = experiment_path

        # client configuration
        self.verbose = verbose
        self.quality = quality
        self.cameras = cameras
        self.weather_id = weather_id
        self.episode_max_time = episode_max_time
        self.allow_braking = allow_braking
        self.separate_actions_for_throttle_and_brake = separate_actions_for_throttle_and_brake
        self.camera_width = camera_width
        self.camera_height = camera_height

        # setup server settings
        self.experiment_suite = experiment_suite
        self.config = config
        if self.config:
            # load settings from file
            with open(self.config, 'r') as fp:
                self.settings = fp.read()
        else:
            # hard coded settings
            self.settings = CarlaSettings()
            self.settings.set(SynchronousMode=True,
                              SendNonPlayerAgentsInfo=False,
                              NumberOfVehicles=15,
                              NumberOfPedestrians=30,
                              WeatherId=random.choice(
                                  force_list(self.weather_id)),
                              QualityLevel=self.quality.value,
                              SeedVehicles=seed,
                              SeedPedestrians=seed)
            if seed is None:
                self.settings.randomize_seeds()

            self.settings = self._add_cameras(self.settings, self.cameras,
                                              self.camera_width,
                                              self.camera_height)

        # open the server
        self.server = self._open_server()

        logging.disable(40)

        # open the client
        self.game = CarlaClient(self.host, self.port, timeout=99999999)
        self.game.connect()
        if self.experiment_suite:
            self.current_experiment_idx = 0
            self.current_experiment = self.experiment_suite.get_experiments()[
                self.current_experiment_idx]
            self.scene = self.game.load_settings(
                self.current_experiment.conditions)
        else:
            self.scene = self.game.load_settings(self.settings)

        # get available start positions
        self.positions = self.scene.player_start_spots
        self.num_positions = len(self.positions)
        self.current_start_position_idx = 0
        self.current_pose = 0

        # state space
        self.state_space = StateSpace({
            "measurements":
            VectorObservationSpace(
                4, measurements_names=["forward_speed", "x", "y", "z"])
        })
        for camera in self.scene.sensors:
            self.state_space[camera.name] = ImageObservationSpace(
                shape=np.array([self.camera_height, self.camera_width, 3]),
                high=255)

        # action space
        if self.separate_actions_for_throttle_and_brake:
            self.action_space = BoxActionSpace(
                shape=3,
                low=np.array([-1, 0, 0]),
                high=np.array([1, 1, 1]),
                descriptions=["steer", "gas", "brake"])
        else:
            self.action_space = BoxActionSpace(
                shape=2,
                low=np.array([-1, -1]),
                high=np.array([1, 1]),
                descriptions=["steer", "gas_and_brake"])

        # human control
        if self.human_control:
            # convert continuous action space to discrete
            self.steering_strength = 0.5
            self.gas_strength = 1.0
            self.brake_strength = 0.5
            # TODO: reverse order of actions
            self.action_space = PartialDiscreteActionSpaceMap(
                target_actions=[[0., 0.], [0., -self.steering_strength],
                                [0., self.steering_strength],
                                [self.gas_strength, 0.],
                                [-self.brake_strength, 0],
                                [self.gas_strength, -self.steering_strength],
                                [self.gas_strength, self.steering_strength],
                                [self.brake_strength, -self.steering_strength],
                                [self.brake_strength, self.steering_strength]],
                descriptions=[
                    'NO-OP', 'TURN_LEFT', 'TURN_RIGHT', 'GAS', 'BRAKE',
                    'GAS_AND_TURN_LEFT', 'GAS_AND_TURN_RIGHT',
                    'BRAKE_AND_TURN_LEFT', 'BRAKE_AND_TURN_RIGHT'
                ])

            # map keyboard keys to actions
            for idx, action in enumerate(self.action_space.descriptions):
                for key in key_map.keys():
                    if action == key:
                        self.key_to_action[key_map[key]] = idx

        self.num_speedup_steps = num_speedup_steps
        self.max_speed = max_speed

        # measurements
        self.autopilot = None
        self.planner = Planner(self.map_name)

        # env initialization
        self.reset_internal_state(True)

        # render
        if self.is_rendered:
            image = self.get_rendered_image()
            self.renderer.create_screen(image.shape[1], image.shape[0])
示例#5
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--preset',
        help=
        "(string) Name of a preset to run (class name from the 'presets' directory.)",
        default=None,
        type=str)
    parser.add_argument('-l',
                        '--list',
                        help="(flag) List all available presets",
                        action='store_true')
    parser.add_argument(
        '-e',
        '--experiment_name',
        help="(string) Experiment name to be used to store the results.",
        default='',
        type=str)
    parser.add_argument('-r',
                        '--render',
                        help="(flag) Render environment",
                        action='store_true')
    parser.add_argument(
        '-f',
        '--framework',
        help="(string) Neural network framework. Available values: tensorflow",
        default='tensorflow',
        type=str)
    parser.add_argument(
        '-n',
        '--num_workers',
        help="(int) Number of workers for multi-process based agents, e.g. A3C",
        default=1,
        type=int)
    parser.add_argument(
        '-c',
        '--use_cpu',
        help=
        "(flag) Use only the cpu for training. If a GPU is not available, this flag will have no "
        "effect and the CPU will be used either way.",
        action='store_true')
    parser.add_argument(
        '-ew',
        '--evaluation_worker',
        help=
        "(int) If multiple workers are used, add an evaluation worker as well which will "
        "evaluate asynchronously and independently during the training. NOTE: this worker will "
        "ignore the evaluation settings in the preset's ScheduleParams.",
        action='store_true')
    parser.add_argument(
        '--play',
        help="(flag) Play as a human by controlling the game with the keyboard. "
        "This option will save a replay buffer with the game play.",
        action='store_true')
    parser.add_argument(
        '--evaluate',
        help="(flag) Run evaluation only. This is a convenient way to disable "
        "training in order to evaluate an existing checkpoint.",
        action='store_true')
    parser.add_argument(
        '-v',
        '--verbosity',
        help=
        "(flag) Sets the verbosity level of Coach print outs. Can be either low or high.",
        default="low",
        type=str)
    parser.add_argument('-tfv',
                        '--tf_verbosity',
                        help="(flag) TensorFlow verbosity level",
                        default=3,
                        type=int)
    parser.add_argument(
        '-s',
        '--save_checkpoint_secs',
        help="(int) Time in seconds between saving checkpoints of the model.",
        default=None,
        type=int)
    parser.add_argument(
        '-crd',
        '--checkpoint_restore_dir',
        help=
        '(string) Path to a folder containing a checkpoint to restore the model from.',
        type=str)
    parser.add_argument('-dg',
                        '--dump_gifs',
                        help="(flag) Enable the gif saving functionality.",
                        action='store_true')
    parser.add_argument('-dm',
                        '--dump_mp4',
                        help="(flag) Enable the mp4 saving functionality.",
                        action='store_true')
    parser.add_argument(
        '-at',
        '--agent_type',
        help=
        "(string) Choose an agent type class to override on top of the selected preset. "
        "If no preset is defined, a preset can be set from the command-line by combining settings "
        "which are set by using --agent_type, --experiment_type, --environemnt_type",
        default=None,
        type=str)
    parser.add_argument(
        '-et',
        '--environment_type',
        help=
        "(string) Choose an environment type class to override on top of the selected preset."
        "If no preset is defined, a preset can be set from the command-line by combining settings "
        "which are set by using --agent_type, --experiment_type, --environemnt_type",
        default=None,
        type=str)
    parser.add_argument(
        '-ept',
        '--exploration_policy_type',
        help=
        "(string) Choose an exploration policy type class to override on top of the selected "
        "preset."
        "If no preset is defined, a preset can be set from the command-line by combining settings "
        "which are set by using --agent_type, --experiment_type, --environemnt_type",
        default=None,
        type=str)
    parser.add_argument(
        '-lvl',
        '--level',
        help=
        "(string) Choose the level that will be played in the environment that was selected."
        "This value will override the level parameter in the environment class.",
        default=None,
        type=str)
    parser.add_argument(
        '-cp',
        '--custom_parameter',
        help=
        "(string) Semicolon separated parameters used to override specific parameters on top of"
        " the selected preset (or on top of the command-line assembled one). "
        "Whenever a parameter value is a string, it should be inputted as '\\\"string\\\"'. "
        "For ex.: "
        "\"visualization.render=False; num_training_iterations=500; optimizer='rmsprop'\"",
        default=None,
        type=str)
    parser.add_argument('--print_networks_summary',
                        help="(flag) Print network summary to stdout",
                        action='store_true')
    parser.add_argument(
        '-tb',
        '--tensorboard',
        help=
        "(flag) When using the TensorFlow backend, enable TensorBoard log dumps. ",
        action='store_true')
    parser.add_argument(
        '-ns',
        '--no_summary',
        help=
        "(flag) Prevent Coach from printing a summary and asking questions at the end of runs",
        action='store_true')
    parser.add_argument(
        '-d',
        '--open_dashboard',
        help="(flag) Open dashboard with the experiment when the run starts",
        action='store_true')
    parser.add_argument('--seed',
                        help="(int) A seed to use for running the experiment",
                        default=None,
                        type=int)

    parser.add_argument(
        '--ray_redis_address',
        help=
        "The address of the Redis server to connect to. If this address is not provided,\
                         then this command will start Redis, a global scheduler, a local scheduler, \
                         a plasma store, a plasma manager, and some workers. \
                         It will also kill these processes when Python exits.",
        default=None,
        type=str)

    parser.add_argument(
        '--ray_num_cpus',
        help=
        "Number of cpus the user wishes all local schedulers to be configured with",
        default=None,
        type=int)

    parser.add_argument(
        '--ray_num_gpus',
        help=
        "Number of gpus the user wishes all local schedulers to be configured with",
        default=None,
        type=int)

    parser.add_argument(
        '--on_devcloud',
        help=
        "Number of gpus the user wishes all local schedulers to be configured with",
        default=False,
        type=bool)

    args = parse_arguments(parser)

    graph_manager = get_graph_manager_from_args(args)

    # Intel optimized TF seems to run significantly faster when limiting to a single OMP thread.
    # This will not affect GPU runs.
    # os.environ["OMP_NUM_THREADS"] = "1"

    # turn TF debug prints off
    if args.framework == Frameworks.tensorflow:
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_verbosity)

    # turn off the summary at the end of the run if necessary
    if not args.no_summary:
        atexit.register(logger.summarize_experiment)
        screen.change_terminal_title(args.experiment_name)

    # open dashboard
    if args.open_dashboard:
        open_dashboard(args.experiment_path)

    # Single-threaded runs
    if args.num_workers == 1:
        # Start the training or evaluation
        task_parameters = TaskParameters(
            framework_type=
            "tensorflow",  # TODO: tensorflow should'nt be hardcoded
            evaluate_only=args.evaluate,
            experiment_path=args.experiment_path,
            seed=args.seed,
            use_cpu=args.use_cpu,
            save_checkpoint_secs=args.save_checkpoint_secs)
        task_parameters.__dict__ = add_items_to_dict(task_parameters.__dict__,
                                                     args.__dict__)

        start_graph(graph_manager=graph_manager,
                    task_parameters=task_parameters)
        #start_graph_ray.remote(graph_manager,task_parameters)

    # Multi-threaded runs
    else:
        #ray.init(redis_address=args.ray_redis_address,
        #    num_cpus=args.ray_num_cpus,
        #    num_gpus=args.ray_num_gpus)

        total_tasks = args.num_workers
        if args.evaluation_worker:
            total_tasks += 1

        if args.on_devcloud:
            ips = create_worker_devcloud(args.num_workers)

            @ray.remote
            def f():
                time.sleep(0.01)
                #os.system('/usr/local/bin/qstat')
                return ray.services.get_node_ip_address()

            if args.on_devcloud:
                ips = set(ray.get([f.remote() for _ in range(1000)]))

            home_ip = socket.gethostbyname(socket.gethostname())

            worker_ips = [z for z in ips if z != home_ip]
            worker_hosts = ",".join(
                ["{}:{}".format(n, get_open_port()) for n in ips])

        else:
            ray.init()
            worker_hosts = ",".join([
                "localhost:{}".format(get_open_port())
                for i in range(total_tasks)
            ])

        ps_hosts = "localhost:{}".format(get_open_port())

        @ray.remote
        def start_distributed_task(job_type,
                                   task_index,
                                   evaluation_worker=False):

            task_parameters = DistributedTaskParameters(
                framework_type=
                "tensorflow",  # TODO: tensorflow should'nt be hardcoded
                parameters_server_hosts=ps_hosts,
                worker_hosts=worker_hosts,
                job_type=job_type,
                task_index=task_index,
                evaluate_only=evaluation_worker,
                use_cpu=args.use_cpu,
                num_tasks=total_tasks,  # training tasks + 1 evaluation task
                num_training_tasks=args.num_workers,
                experiment_path=args.experiment_path,
                shared_memory_scratchpad=None,
                seed=args.seed + task_index if args.seed is not None else
                None)  # each worker gets a different seed
            task_parameters.__dict__ = add_items_to_dict(
                task_parameters.__dict__, args.__dict__)
            # we assume that only the evaluation workers are rendering

            graph_manager.visualization_parameters.render = args.render and evaluation_worker
            start_graph(graph_manager, task_parameters)
            #p = Process(target=start_graph, args=(graph_manager, task_parameters))
            #p.start()
            return

        @ray.remote
        def start_distributed_ray_task(job_type,
                                       task_index,
                                       evaluation_worker=False):
            task_parameters = DistributedTaskParameters(
                framework_type=
                "tensorflow",  # TODO: tensorflow should'nt be hardcoded
                parameters_server_hosts=ps_hosts,
                worker_hosts=worker_hosts,
                job_type=job_type,
                task_index=task_index,
                evaluate_only=evaluation_worker,
                use_cpu=args.use_cpu,
                num_tasks=total_tasks,  # training tasks + 1 evaluation task
                num_training_tasks=args.num_workers,
                experiment_path=args.experiment_path,
                shared_memory_scratchpad=None,
                seed=args.seed + task_index if args.seed is not None else
                None)  # each worker gets a different seed
            task_parameters.__dict__ = add_items_to_dict(
                task_parameters.__dict__, args.__dict__)
            # we assume that only the evaluation workers are rendering
            graph_manager.visualization_parameters.render = args.render and evaluation_worker
            start_graph(graph_manager, task_parameters)
            return 1

        # parameter server
        parameter_server = start_distributed_task.remote("ps", 0)

        # training workers
        # wait a bit before spawning the non chief workers in order to make sure the session is already created
        workers = []
        workers.append(start_distributed_task.remote("worker", 0))
        time.sleep(2)

        for task_index in range(1, args.num_workers):
            workers.append(start_distributed_task.remote("worker", task_index))