Пример #1
0
    def learn_from_batch(self, batch):
        if not self.main_network.online_network.output_heads[
                0].DND.has_enough_entries(self.tp.agent.number_of_knn):
            return 0
        else:
            if not self.training_started:
                self.training_started = True
                screen.log_title(
                    "Finished collecting initial entries in DND. Starting to train network..."
                )

        current_states, next_states, actions, rewards, game_overs, total_return = self.extract_batch(
            batch)

        TD_targets = self.main_network.online_network.predict(current_states)

        #  only update the action that we have actually done in this transition
        for i in range(self.tp.batch_size):
            TD_targets[i, actions[i]] = total_return[i]

        # train the neural network
        result = self.main_network.train_and_sync_networks(
            current_states, TD_targets)

        total_loss = result[0]

        return total_loss
Пример #2
0
    def evaluate(self, num_episodes, keep_networks_synced=False):
        """
        Run in an evaluation mode for several episodes. Actions will be chosen greedily.
        :param keep_networks_synced: keep the online network in sync with the global network after every episode
        :param num_episodes: The number of episodes to evaluate on
        :return: None
        """

        max_reward_achieved = -float('inf')
        average_evaluation_reward = 0
        screen.log_title("Running evaluation")
        self.env.change_phase(RunPhase.TEST)
        for i in range(num_episodes):
            # keep the online network in sync with the global network
            if keep_networks_synced:
                for network in self.networks:
                    network.sync()

            episode_ended = False
            while not episode_ended:
                episode_ended = self.act(phase=RunPhase.TEST)

            if self.tp.visualization.dump_gifs and self.total_reward_in_current_episode > max_reward_achieved:
                max_reward_achieved = self.total_reward_in_current_episode
                frame_skipping = int(5/self.tp.env.frame_skip)
                logger.create_gif(self.last_episode_images[::frame_skipping],
                                  name='score-{}'.format(max_reward_achieved), fps=10)

            average_evaluation_reward += self.total_reward_in_current_episode
            self.reset_game()

        average_evaluation_reward /= float(num_episodes)

        self.env.change_phase(RunPhase.TRAIN)
        screen.log_title("Evaluation done. Average reward = {}.".format(average_evaluation_reward))
Пример #3
0
    def improve(self):
        """
        Training algorithms wrapper. Heatup >> [ Evaluate >> Play >> Train >> Save checkpoint ]

        :return: None
        """

        # synchronize the online network weights with the global network
        for network in self.networks:
            network.sync()

        # heatup phase
        if self.tp.num_heatup_steps != 0:
            self.in_heatup = True
            screen.log_title("Starting heatup {}".format(self.task_id))
            num_steps_required_for_one_training_batch = self.tp.batch_size * self.tp.env.observation_stack_size
            for step in range(max(self.tp.num_heatup_steps, num_steps_required_for_one_training_batch)):
                self.act()

        # training phase
        self.in_heatup = False
        screen.log_title("Starting training {}".format(self.task_id))
        self.exploration_policy.change_phase(RunPhase.TRAIN)
        training_start_time = time.time()
        model_snapshots_periods_passed = -1

        while self.training_iteration < self.tp.num_training_iterations:
            # evaluate
            evaluate_agent = (self.last_episode_evaluation_ran is not self.current_episode) and \
                             (self.current_episode % self.tp.evaluate_every_x_episodes == 0)
            if evaluate_agent:
                self.last_episode_evaluation_ran = self.current_episode
                self.evaluate(self.tp.evaluation_episodes)

            # snapshot model
            if self.tp.save_model_sec and self.tp.save_model_sec > 0 and not self.tp.distributed:
                total_training_time = time.time() - training_start_time
                current_snapshot_period = (int(total_training_time) // self.tp.save_model_sec)
                if current_snapshot_period > model_snapshots_periods_passed:
                    model_snapshots_periods_passed = current_snapshot_period
                    self.main_network.save_model(model_snapshots_periods_passed)

            # play and record in replay buffer
            if self.tp.agent.step_until_collecting_full_episodes:
                step = 0
                while step < self.tp.agent.num_consecutive_playing_steps or self.memory.get_episode(-1).length() != 0:
                    self.act()
                    step += 1
            else:
                for step in range(self.tp.agent.num_consecutive_playing_steps):
                    self.act()

            # train
            if self.tp.train:
                for step in range(self.tp.agent.num_consecutive_training_steps):
                    loss = self.train()
                    self.loss.add_sample(loss)
                    self.training_iteration += 1
                self.post_training_commands()
Пример #4
0
def set_framework(framework_type):
    # choosing neural network framework
    framework = Frameworks().get(framework_type)
    sess = None
    if framework == Frameworks.TensorFlow:
        import tensorflow as tf
        config = tf.ConfigProto()
        config.allow_soft_placement = True
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.2
        sess = tf.Session(config=config)
    elif framework == Frameworks.Neon:
        import ngraph as ng
        sess = ng.transformers.make_transformer()
    screen.log_title("Using {} framework".format(Frameworks().to_string(framework)))
    return sess
Пример #5
0
    def learn_from_batch(self, batch):
        if not self.main_network.online_network.output_heads[
                0].DND.has_enough_entries(self.tp.agent.number_of_knn):
            return 0
        else:
            if not self.training_started:
                self.training_started = True
                screen.log_title(
                    "Finished collecting initial entries in DND. Starting to train network..."
                )

        current_states, next_states, actions, rewards, game_overs, total_return = self.extract_batch(
            batch)
        result = self.main_network.train_and_sync_networks(
            current_states, total_return)
        total_loss = result[0]

        return total_loss
Пример #6
0
def check_input_and_fill_run_dict(parser):
    args = parser.parse_args()

    # if no arg is given
    if len(sys.argv) == 1:
        parser.print_help()
        exit(0)

    # list available presets
    if args.list:
        presets_lists = list_all_classes_in_module(presets)
        screen.log_title("Available Presets:")
        for preset in presets_lists:
            print(preset)
        sys.exit(0)

    # check inputs
    try:
        # num_workers = int(args.num_workers)
        num_workers = int(re.match("^\d+$", args.num_workers).group(0))
    except ValueError:
        screen.error("Parameter num_workers should be an integer.")

    preset_names = list_all_classes_in_module(presets)
    if args.preset is not None and args.preset not in preset_names:
        screen.error("A non-existing preset was selected. ")

    if args.checkpoint_restore_dir is not None and not os.path.exists(args.checkpoint_restore_dir):
        screen.error("The requested checkpoint folder to load from does not exist. ")

    if args.save_model_sec is not None:
        try:
            args.save_model_sec = int(args.save_model_sec)
        except ValueError:
            screen.error("Parameter save_model_sec should be an integer.")

    if args.preset is None and (args.agent_type is None or args.environment_type is None
                                       or args.exploration_policy_type is None) and not args.play:
        screen.error('When no preset is given for Coach to run, the user is expected to input the desired agent_type,'
                     ' environment_type and exploration_policy_type to assemble a preset. '
                     '\nAt least one of these parameters was not given.')
    elif args.preset is None and args.play and args.environment_type is None:
        screen.error('When no preset is given for Coach to run, and the user requests human control over the environment,'
                     ' the user is expected to input the desired environment_type and level.'
                     '\nAt least one of these parameters was not given.')
    elif args.preset is None and args.play and args.environment_type:
        args.agent_type = 'Human'
        args.exploration_policy_type = 'ExplorationParameters'

    # get experiment name and path
    experiment_name = logger.get_experiment_name(args.experiment_name)
    experiment_path = logger.get_experiment_path(experiment_name)

    if args.play and num_workers > 1:
        screen.warning("Playing the game as a human is only available with a single worker. "
                       "The number of workers will be reduced to 1")
        num_workers = 1

    # fill run_dict
    run_dict = dict()
    run_dict['agent_type'] = args.agent_type
    run_dict['environment_type'] = args.environment_type
    run_dict['exploration_policy_type'] = args.exploration_policy_type
    run_dict['level'] = args.level
    run_dict['preset'] = args.preset
    run_dict['custom_parameter'] = args.custom_parameter
    run_dict['experiment_path'] = experiment_path
    run_dict['framework'] = Frameworks().get(args.framework)
    run_dict['play'] = args.play
    run_dict['evaluate'] = args.evaluate# or args.play

    # multi-threading parameters
    run_dict['num_threads'] = num_workers

    # checkpoints
    run_dict['save_model_sec'] = args.save_model_sec
    run_dict['save_model_dir'] = experiment_path if args.save_model_sec is not None else None
    run_dict['checkpoint_restore_dir'] = args.checkpoint_restore_dir

    # visualization
    run_dict['visualization.dump_gifs'] = args.dump_gifs
    run_dict['visualization.render'] = args.render
    run_dict['visualization.tensorboard'] = args.tensorboard

    return args, run_dict
Пример #7
0
        worker_hosts = ",".join(["localhost:{}".format(get_open_port()) for i in range(run_dict['num_threads'] + 1)])

        # Make sure to disable GPU so that all the workers will use the CPU
        set_cpu()

        # create a parameter server
        cmd = [
            "python3",
           "./parallel_actor.py",
           "--ps_hosts={}".format(ps_hosts),
           "--worker_hosts={}".format(worker_hosts),
           "--job_name=ps",
        ]
        parameter_server = Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=1)

        screen.log_title("*** Distributed Training ***")
        time.sleep(1)

        # create N training workers and 1 evaluating worker
        workers = []

        for i in range(run_dict['num_threads'] + 1):
            # this is the evaluation worker
            run_dict['task_id'] = i
            if i == run_dict['num_threads']:
                run_dict['evaluate_only'] = True
                run_dict['visualization.render'] = args.render
            else:
                run_dict['evaluate_only'] = False
                run_dict['visualization.render'] = False  # #In a parallel setting, only the evaluation agent renders
Пример #8
0
agent_params.exploration = AdditiveNoiseParameters()
agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0)
agent_params.exploration.evaluation_noise_percentage = 0

# no playing during the training phase
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(0)

# use the following command line to download and extract the CARLA dataset:
# python rl_coach/utilities/carla_dataset_to_replay_buffer.py
agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p"
agent_params.memory.state_key_with_the_class_index = 'high_level_command'
agent_params.memory.num_classes = 4

# download dataset if it doesn't exist
if not os.path.exists(agent_params.memory.load_memory_from_file_path):
    screen.log_title("The CARLA dataset is not present in the following path: {}"
                     .format(agent_params.memory.load_memory_from_file_path))
    result = screen.ask_yes_no("Do you want to download it now?")
    if result:
        create_dataset(None, "./datasets/carla_train_set_replay_buffer.p")
    else:
        screen.error("Please update the path to the CARLA dataset in the CARLA_CIL preset", crash=True)


###############
# Environment #
###############
env_params = CarlaEnvironmentParameters()
env_params.level = 'town1'
env_params.cameras = ['CameraRGB']
env_params.camera_height = 600
env_params.camera_width = 800
Пример #9
0
    def __init__(self,
                 env,
                 tuning_parameters,
                 replicated_device=None,
                 task_id=0):
        """
        :param env: An environment instance
        :type env: EnvironmentWrapper
        :param tuning_parameters: A Preset class instance with all the running paramaters
        :type tuning_parameters: Preset
        :param replicated_device: A tensorflow device for distributed training (optional)
        :type replicated_device: instancemethod
        :param thread_id: The current thread id
        :param thread_id: int
        """

        screen.log_title("Creating agent {}".format(task_id))
        self.task_id = task_id
        self.sess = tuning_parameters.sess
        self.env = tuning_parameters.env_instance = env
        self.imitation = False

        # i/o dimensions
        if not tuning_parameters.env.desired_observation_width or not tuning_parameters.env.desired_observation_height:
            tuning_parameters.env.desired_observation_width = self.env.width
            tuning_parameters.env.desired_observation_height = self.env.height
        self.action_space_size = tuning_parameters.env.action_space_size = self.env.action_space_size
        self.measurements_size = tuning_parameters.env.measurements_size = self.env.measurements_size
        if tuning_parameters.agent.use_accumulated_reward_as_measurement:
            self.measurements_size = tuning_parameters.env.measurements_size = (
                self.measurements_size[0] + 1, )

        # modules
        if tuning_parameters.agent.load_memory_from_file_path:
            screen.log_title(
                "Loading replay buffer from pickle. Pickle path: {}".format(
                    tuning_parameters.agent.load_memory_from_file_path))
            self.memory = read_pickle(
                tuning_parameters.agent.load_memory_from_file_path)
        else:
            self.memory = eval(tuning_parameters.memory +
                               '(tuning_parameters)')
        # self.architecture = eval(tuning_parameters.architecture)

        self.has_global = replicated_device is not None
        self.replicated_device = replicated_device
        self.worker_device = "/job:worker/task:{}/cpu:0".format(
            task_id) if replicated_device is not None else "/gpu:0"

        self.exploration_policy = eval(tuning_parameters.exploration.policy +
                                       '(tuning_parameters)')
        self.evaluation_exploration_policy = eval(
            tuning_parameters.exploration.evaluation_policy +
            '(tuning_parameters)')
        self.evaluation_exploration_policy.change_phase(RunPhase.TEST)

        # initialize all internal variables
        self.tp = tuning_parameters
        self.in_heatup = False
        self.total_reward_in_current_episode = 0
        self.total_steps_counter = 0
        self.running_reward = None
        self.training_iteration = 0
        self.current_episode = self.tp.current_episode = 0
        self.curr_state = {}
        self.current_episode_steps_counter = 0
        self.episode_running_info = {}
        self.last_episode_evaluation_ran = 0
        self.running_observations = []
        logger.set_current_time(self.current_episode)
        self.main_network = None
        self.networks = []
        self.last_episode_images = []
        self.renderer = Renderer()

        # signals
        self.signals = []
        self.loss = Signal('Loss')
        self.signals.append(self.loss)
        self.curr_learning_rate = Signal('Learning Rate')
        self.signals.append(self.curr_learning_rate)

        if self.tp.env.normalize_observation and not self.env.is_state_type_image:
            if not self.tp.distributed or not self.tp.agent.share_statistics_between_workers:
                self.running_observation_stats = RunningStat(
                    (self.tp.env.desired_observation_width, ))
                self.running_reward_stats = RunningStat(())
            else:
                self.running_observation_stats = SharedRunningStats(
                    self.tp,
                    replicated_device,
                    shape=(self.tp.env.desired_observation_width, ),
                    name='observation_stats')
                self.running_reward_stats = SharedRunningStats(
                    self.tp, replicated_device, shape=(), name='reward_stats')

        # env is already reset at this point. Otherwise we're getting an error where you cannot
        # reset an env which is not done
        self.reset_game(do_not_reset_env=True)

        # use seed
        if self.tp.seed is not None:
            random.seed(self.tp.seed)
            np.random.seed(self.tp.seed)
Пример #10
0
def check_input_and_fill_run_dict(parser):
    args = parser.parse_args()

    # if no arg is given
    if len(sys.argv) == 1:
        parser.print_help()
        exit(0)

    # list available presets
    if args.list:
        presets_lists = list_all_classes_in_module(presets)
        screen.log_title("Available Presets:")
        for preset in presets_lists:
            print(preset)
        sys.exit(0)

    # check inputs
    try:
        # num_workers = int(args.num_workers)
        num_workers = int(re.match("^\d+$", args.num_workers).group(0))
    except ValueError:
        screen.error("Parameter num_workers should be an integer.")
        exit(1)

    preset_names = list_all_classes_in_module(presets)
    if args.preset is not None and args.preset not in preset_names:
        screen.error("A non-existing preset was selected. ")
        exit(1)

    if args.checkpoint_restore_dir is not None and not os.path.exists(
            args.checkpoint_restore_dir):
        screen.error(
            "The requested checkpoint folder to load from does not exist. ")
        exit(1)

    if args.save_model_sec is not None:
        try:
            args.save_model_sec = int(args.save_model_sec)
        except ValueError:
            screen.error("Parameter save_model_sec should be an integer.")
            exit(1)

    if args.preset is None and (args.agent_type is None
                                or args.environment_type is None
                                or args.exploration_policy_type is None):
        screen.error(
            'When no preset is given for Coach to run, the user is expected to input the desired agent_type,'
            ' environment_type and exploration_policy_type to assemble a preset. '
            '\nAt least one of these parameters was not given.')
        exit(1)

    experiment_name = args.experiment_name

    if args.experiment_name == '':
        experiment_name = screen.ask_input("Please enter an experiment name: ")

    experiment_name = experiment_name.replace(" ", "_")
    match = re.match("^$|^\w{1,100}$", experiment_name)

    if match is None:
        screen.error(
            'Experiment name must be composed only of alphanumeric letters and underscores and should not be '
            'longer than 100 characters.')
        exit(1)
    experiment_path = os.path.join('./experiments/', match.group(0))
    experiment_path = get_experiment_path(experiment_path)

    # fill run_dict
    run_dict = dict()
    run_dict['agent_type'] = args.agent_type
    run_dict['environment_type'] = args.environment_type
    run_dict['exploration_policy_type'] = args.exploration_policy_type
    run_dict['preset'] = args.preset
    run_dict['custom_parameter'] = args.custom_parameter
    run_dict['experiment_path'] = experiment_path
    run_dict['framework'] = Frameworks().get(args.framework)

    # multi-threading parameters
    run_dict['num_threads'] = num_workers

    # checkpoints
    run_dict['save_model_sec'] = args.save_model_sec
    run_dict[
        'save_model_dir'] = experiment_path if args.save_model_sec is not None else None
    run_dict['checkpoint_restore_dir'] = args.checkpoint_restore_dir

    # visualization
    run_dict['visualization.dump_gifs'] = args.dump_gifs
    run_dict['visualization.render'] = args.render

    return args, run_dict
Пример #11
0
        preset = eval('presets.{}()'.format(preset_name))
        if preset.test and preset_name not in presets_to_ignore:
            frameworks = []
            if preset.agent.tensorflow_support and not args.ignore_tensorflow:
                frameworks.append('tensorflow')
            if preset.agent.neon_support and not args.ignore_neon:
                frameworks.append('neon')

            for framework in frameworks:
                if args.stop_after_first_failure and fail_count > 0:
                    break

                test_count += 1

                # run the experiment in a separate thread
                screen.log_title("Running test {} - {}".format(
                    preset_name, framework))
                log_file_name = 'test_log_{preset_name}_{framework}.txt'.format(
                    preset_name=preset_name,
                    framework=framework,
                )
                cmd = ('CUDA_VISIBLE_DEVICES='
                       ' python3 coach.py '
                       '-p {preset_name} '
                       '-f {framework} '
                       '-e {test_name} '
                       '-n {num_workers} '
                       '-cp "seed=0" '
                       '&> {log_file_name} ').format(
                           preset_name=preset_name,
                           framework=framework,
                           test_name=test_name,