def test_constant_schedule():
    schedule = ConstantSchedule(0.3)

    # make sure the values in the constant schedule don't change over time
    for i in range(1000):
        assert schedule.initial_value == 0.3
        assert schedule.current_value == 0.3
        schedule.step()
示例#2
0
 def __init__(self):
     super().__init__(algorithm=NECAlgorithmParameters(),
                      exploration=EGreedyParameters(),
                      memory=NECMemoryParameters(),
                      networks={"main": NECNetworkParameters()})
     self.exploration.epsilon_schedule = ConstantSchedule(0.1)
     self.exploration.evaluation_epsilon = 0.01
 def __init__(self,
              max_size: Tuple[MemoryGranularity, int],
              alpha: float = 0.6,
              beta: Schedule = ConstantSchedule(0.4),
              epsilon: float = 1e-6,
              allow_duplicates_in_batch_sampling: bool = True):
     """
     :param max_size: the maximum number of transitions or episodes to hold in the memory
     :param alpha: the alpha prioritization coefficient
     :param beta: the beta parameter used for importance sampling
     :param epsilon: a small value added to the priority of each transition
     :param allow_duplicates_in_batch_sampling: allow having the same transition multiple times in a batch
     """
     if max_size[0] != MemoryGranularity.Transitions:
         raise ValueError(
             "Prioritized Experience Replay currently only support setting the memory size in "
             "transitions granularity.")
     self.power_of_2_size = 1
     while self.power_of_2_size < max_size[1]:
         self.power_of_2_size *= 2
     super().__init__((MemoryGranularity.Transitions, self.power_of_2_size),
                      allow_duplicates_in_batch_sampling)
     self.sum_tree = SegmentTree(self.power_of_2_size,
                                 SegmentTree.Operation.SUM)
     self.min_tree = SegmentTree(self.power_of_2_size,
                                 SegmentTree.Operation.MIN)
     self.max_tree = SegmentTree(self.power_of_2_size,
                                 SegmentTree.Operation.MAX)
     self.alpha = alpha
     self.beta = beta
     self.epsilon = epsilon
     self.maximal_priority = 1.0
示例#4
0
def coach_adc(model, dataset, arch, data_loader, validate_fn, save_checkpoint_fn):
    task_parameters = TaskParameters(framework_type="tensorflow",
                                     experiment_path="./experiments/test")
    extra_params = {'save_checkpoint_secs': None,
                    'render': True}
    task_parameters.__dict__.update(extra_params)

    # Create a dictionary of parameters that Coach will handover to CNNEnvironment
    # Once it creates it.
    if True:
        exploration_noise = 0.5
        #exploration_noise = 0.25
        exploitation_decay = 0.996
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'dataset': dataset,
            'arch': arch,
            'data_loader': data_loader,
            'validate_fn': validate_fn,
            'save_checkpoint_fn': save_checkpoint_fn,
            #'action_range': (0.10, 0.95),
            'action_range': (0.70, 0.95),
            'onehot_encoding': False,
            'normalize_obs': True,
            'desired_reduction': None,
            'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top5/100) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * max(1-top1/100, 0.25) * math.log(total_macs)
            #'reward_fn': lambda top1, total_macs: -1 * (1-top1/100) * math.log(total_macs/100000)
            #'reward_fn': lambda top1, total_macs:  top1/100 * total_macs/self.dense_model_macs
        }
    else:
        exploration_noise = 0.5
        #exploration_noise = 0.25
        exploitation_decay = 0.996
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'dataset': dataset,
            'arch': arch,
            'data_loader': data_loader,
            'validate_fn': validate_fn,
            'save_checkpoint_fn': save_checkpoint_fn,
            'action_range': (0.10, 0.95),
            'onehot_encoding': False,
            'normalize_obs': True,
            'desired_reduction': 1.5e8,
            'reward_fn': lambda top1, total_macs: top1/100
            #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75)
        }

    #msglogger.debug('Experiment configuarion:\n' + json.dumps(graph_manager.env_params.additional_simulator_parameters, indent=2))
    steps_per_episode = 13
    agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([(ConstantSchedule(exploration_noise),
                                                                             EnvironmentSteps(100*steps_per_episode)),
                                                                            (ExponentialSchedule(exploration_noise, 0, exploitation_decay),
                                                                             EnvironmentSteps(300*steps_per_episode))])
    graph_manager.create_graph(task_parameters)
    graph_manager.improve()
示例#5
0
def coach_adc(model, dataset, arch, optimizer_data, validate_fn, save_checkpoint_fn, train_fn):
    # task_parameters = TaskParameters(framework_type="tensorflow",
    #                                  experiment_path="./experiments/test")
    # extra_params = {'save_checkpoint_secs': None,
    #                 'render': True}
    # task_parameters.__dict__.update(extra_params)
    task_parameters = TaskParameters(experiment_path=logger.get_experiment_path('adc'))
    conv_cnt = count_conv_layer(model)

    # Create a dictionary of parameters that Coach will handover to CNNEnvironment
    # Once it creates it.
    services = distiller.utils.MutableNamedTuple({
                'validate_fn': validate_fn,
                'save_checkpoint_fn': save_checkpoint_fn,
                'train_fn': train_fn})

    app_args = distiller.utils.MutableNamedTuple({
                'dataset': dataset,
                'arch': arch,
                'optimizer_data': optimizer_data})
    if True:
        amc_cfg = distiller.utils.MutableNamedTuple({
                #'action_range': (0.20, 0.95),
                'action_range': (0.20, 0.80),
                'onehot_encoding': False,
                'normalize_obs': True,
                'desired_reduction': None,
                'reward_fn': lambda top1, top5, vloss, total_macs: -1 * (1-top1/100) * math.log(total_macs),
                'conv_cnt': conv_cnt,
                'max_reward': -1000})
    else:
        amc_cfg = distiller.utils.MutableNamedTuple({
                'action_range': (0.10, 0.95),
                'onehot_encoding': False,
                'normalize_obs': True,
                'desired_reduction': 1.5e8,
                'reward_fn': lambda top1, top5, vloss, total_macs: top1/100,
                #'reward_fn': lambda top1, total_macs: min(top1/100, 0.75),
                'conv_cnt': conv_cnt,
                'max_reward': -1000})

    # These parameters are passed to the Distiller environment
    graph_manager.env_params.additional_simulator_parameters = {'model': model,
                                                                'app_args': app_args,
                                                                'amc_cfg': amc_cfg,
                                                                'services': services}
    exploration_noise = 0.5
    exploitation_decay = 0.996
    steps_per_episode = conv_cnt
    agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([
        (ConstantSchedule(exploration_noise), EnvironmentSteps(100*steps_per_episode)),
        (ExponentialSchedule(exploration_noise, 0, exploitation_decay), EnvironmentSteps(300*steps_per_episode))])
    graph_manager.create_graph(task_parameters)
    graph_manager.improve()
示例#6
0
 def __init__(self):
     super().__init__()
     self.num_episodes_in_experience_replay = 1000000
     self.policy_gradient_rescaler = PolicyGradientRescaler.GAE
     self.gae_lambda = 0.95
     self.use_kl_regularization = False
     self.clip_likelihood_ratio_using_epsilon = 0.2
     self.estimate_state_value_using_gae = True
     self.beta_entropy = 0.01  # should be 0 for mujoco
     self.num_consecutive_playing_steps = EnvironmentSteps(2048)
     self.optimization_epochs = 10
     self.normalization_stats = None
     self.clipping_decay_schedule = ConstantSchedule(1)
     self.act_for_full_episodes = True
def test_piece_wise_schedule():
    # decreasing schedule
    schedule = PieceWiseSchedule(
        [(LinearSchedule(1, 3, 10), EnvironmentSteps(5)),
         (ConstantSchedule(4), EnvironmentSteps(10)),
         (ExponentialSchedule(3, 1, 0.99), EnvironmentSteps(10))
         ]
    )

    target_values = np.append(np.linspace(1, 2, 6), np.ones(11)*4)
    for i in range(16):
        assert round(schedule.current_value, 4) == round(target_values[i], 4)
        schedule.step()

    current_power = 1
    for i in range(10):
        assert round(schedule.current_value, 4) == round(3*current_power, 4)
        current_power *= 0.99
        schedule.step()
示例#8
0
def do_adc_internal(model, args, optimizer_data, validate_fn,
                    save_checkpoint_fn, train_fn):
    dataset = args.dataset
    arch = args.arch
    perform_thinning = True  # args.amc_thinning
    num_ft_epochs = args.amc_ft_epochs
    action_range = args.amc_action_range
    np.random.seed()
    conv_cnt = count_conv_layer(model)

    msglogger.info("Executing AMC: RL agent - %s   RL library - %s",
                   args.amc_agent_algo, RLLIB)

    # Create a dictionary of parameters that Coach will handover to DistillerWrapperEnvironment
    # Once it creates it.
    services = distiller.utils.MutableNamedTuple({
        'validate_fn': validate_fn,
        'save_checkpoint_fn': save_checkpoint_fn,
        'train_fn': train_fn
    })

    app_args = distiller.utils.MutableNamedTuple({
        'dataset':
        dataset,
        'arch':
        arch,
        'optimizer_data':
        optimizer_data
    })

    amc_cfg = distiller.utils.MutableNamedTuple({
        'protocol':
        args.amc_protocol,
        'agent_algo':
        args.amc_agent_algo,
        'perform_thinning':
        perform_thinning,
        'num_ft_epochs':
        num_ft_epochs,
        'action_range':
        action_range,
        'conv_cnt':
        conv_cnt,
        'reward_frequency':
        args.amc_reward_frequency
    })

    #net_wrapper = NetworkWrapper(model, app_args, services)
    #return sample_networks(net_wrapper, services)

    if args.amc_protocol == "accuracy-guaranteed":
        amc_cfg.target_density = None
        amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: -(
            1 - top1 / 100) * math.log(total_macs)
        amc_cfg.action_constrain_fn = None
    elif args.amc_protocol == "mac-constrained":
        amc_cfg.target_density = args.amc_target_density
        amc_cfg.reward_fn = lambda env, top1, top5, vloss, total_macs: top1 / 100  #(90.5 - top1) / 10
        amc_cfg.action_constrain_fn = DistillerWrapperEnvironment.get_action
    elif args.amc_protocol == "mac-constrained-experimental":
        amc_cfg.target_density = args.amc_target_density
        amc_cfg.reward_fn = experimental_reward_fn
        amc_cfg.action_constrain_fn = None
    else:
        raise ValueError("{} is not supported currently".format(
            args.amc_protocol))

    steps_per_episode = conv_cnt
    if args.amc_agent_algo == "DDPG":
        amc_cfg.heatup_noise = 0.5
        amc_cfg.initial_training_noise = 0.5
        amc_cfg.training_noise_decay = 0.996  # 0.998
        amc_cfg.num_heatup_epochs = args.amc_heatup_epochs
        amc_cfg.num_training_epochs = args.amc_training_epochs
        training_noise_duration = amc_cfg.num_training_epochs * steps_per_episode
        heatup_duration = amc_cfg.num_heatup_epochs * steps_per_episode

    if amc_cfg.agent_algo == "Random-policy":
        return random_agent(
            DistillerWrapperEnvironment(model, app_args, amc_cfg, services))

    if RLLIB == "spinup":
        msglogger.info("AMC: Using spinup")
        env1 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services)
        env2 = DistillerWrapperEnvironment(model, app_args, amc_cfg, services)
        ddpg_spinup(env1, env2)
    else:
        msglogger.info("AMC: Using coach")

        # When we import the graph_manager from the ADC_DDPG preset, we implicitly instruct
        # Coach to create and use our DistillerWrapperEnvironment environment.
        # So Distiller calls Coach, which creates the environment, trains the agent, and ends.
        if args.amc_agent_algo == "DDPG":
            from examples.automated_deep_compression.presets.ADC_DDPG import graph_manager, agent_params
            agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule(
                [(ConstantSchedule(amc_cfg.heatup_noise),
                  EnvironmentSteps(heatup_duration)),
                 (ExponentialSchedule(amc_cfg.initial_training_noise, 0,
                                      amc_cfg.training_noise_decay),
                  EnvironmentSteps(training_noise_duration))])
            # agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0)
        elif "ClippedPPO" in args.amc_agent_algo:
            from examples.automated_deep_compression.presets.ADC_ClippedPPO import graph_manager, agent_params

        # These parameters are passed to the Distiller environment
        graph_manager.env_params.additional_simulator_parameters = {
            'model': model,
            'app_args': app_args,
            'amc_cfg': amc_cfg,
            'services': services
        }

        coach_logs_dir = os.path.join(msglogger.logdir, 'coach')
        os.mkdir(coach_logs_dir)
        task_parameters = TaskParameters(experiment_path=coach_logs_dir)
        graph_manager.create_graph(task_parameters)
        graph_manager.improve()
示例#9
0
agent_params.network_wrappers['main'].batch_size = 128
agent_params.network_wrappers['main'].middleware_parameters.scheme = [
    Dense([256])
]
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'state': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
agent_params.algorithm.discount = 0.98
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(16)
agent_params.algorithm.num_consecutive_training_steps = 40
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = TrainingSteps(
    40)
agent_params.algorithm.rate_for_copying_weights_to_target = 0.05
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.2)
agent_params.exploration.evaluation_epsilon = 0

agent_params.memory = EpisodicHindsightExperienceReplayParameters()
agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Final
agent_params.memory.hindsight_transitions_per_regular_transition = 1
agent_params.memory.goals_space = GoalsSpace(
    goal_name='state',
    reward_type=ReachingGoal(distance_from_goal_threshold=0,
                             goal_reaching_reward=0,
                             default_reward=-1),
    distance_metric=GoalsSpace.DistanceMetric.Euclidean)

###############
# Environment #
###############
示例#10
0
# HER parameters
agent_params.memory = EpisodicHindsightExperienceReplayParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**6)
agent_params.memory.hindsight_goal_selection_method = HindsightGoalSelectionMethod.Future
agent_params.memory.hindsight_transitions_per_regular_transition = 4
agent_params.memory.goals_space = GoalsSpace(goal_name='achieved_goal',
                                             reward_type=ReachingGoal(distance_from_goal_threshold=0.05,
                                                                      goal_reaching_reward=0,
                                                                      default_reward=-1),
                                             distance_metric=GoalsSpace.DistanceMetric.Euclidean)
agent_params.memory.shared_memory = True

# exploration parameters
agent_params.exploration = EGreedyParameters()
agent_params.exploration.epsilon_schedule = ConstantSchedule(0.3)
agent_params.exploration.evaluation_epsilon = 0
# they actually take the noise_schedule to be 0.2 * max_abs_range which is 0.1 * total_range
agent_params.exploration.continuous_exploration_policy_parameters.noise_schedule = ConstantSchedule(0.1)
agent_params.exploration.continuous_exploration_policy_parameters.evaluation_noise = 0

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter('observation', 'clipping', ObservationClippingFilter(-200, 200))

agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter('observation', 'normalize_observation',
                                                       ObservationNormalizationFilter(name='normalize_observation'))
agent_params.pre_network_filter.add_observation_filter('achieved_goal', 'normalize_achieved_goal',
                                                       ObservationNormalizationFilter(name='normalize_achieved_goal'))
agent_params.pre_network_filter.add_observation_filter('desired_goal', 'normalize_desired_goal',
                                                       ObservationNormalizationFilter(name='normalize_desired_goal'))
示例#11
0
agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter('CameraRGB', 'cropping',
                                                 ObservationCropFilter(crop_low=np.array([115, 0, 0]),
                                                                       crop_high=np.array([510, -1, -1])))
agent_params.input_filter.add_observation_filter('CameraRGB', 'rescale',
                                                 ObservationRescaleToSizeFilter(
                                                     ImageObservationSpace(np.array([88, 200, 3]), high=255)))
agent_params.input_filter.add_observation_filter('CameraRGB', 'to_uint8', ObservationToUInt8Filter(0, 255))
agent_params.input_filter.add_observation_filter(
    'measurements', 'select_speed',
    ObservationReductionBySubPartsNameFilter(
        ["forward_speed"], reduction_method=ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep))

# no exploration is used
agent_params.exploration = AdditiveNoiseParameters()
agent_params.exploration.noise_percentage_schedule = ConstantSchedule(0)
agent_params.exploration.evaluation_noise_percentage = 0

# no playing during the training phase
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(0)

# use the following command line to download and extract the CARLA dataset:
# python rl_coach/utilities/carla_dataset_to_replay_buffer.py
agent_params.memory.load_memory_from_file_path = "./datasets/carla_train_set_replay_buffer.p"
agent_params.memory.state_key_with_the_class_index = 'high_level_command'
agent_params.memory.num_classes = 4

# download dataset if it doesn't exist
if not os.path.exists(agent_params.memory.load_memory_from_file_path):
    screen.log_title("The CARLA dataset is not present in the following path: {}"
                     .format(agent_params.memory.load_memory_from_file_path))
schedule_params.evaluation_steps = EnvironmentSteps(135000)
schedule_params.heatup_steps = EnvironmentSteps(50000)

#########
# Agent #
#########
agent_params = DDQNAgentParameters()
agent_params.network_wrappers['main'].learning_rate = 0.0001
agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Empty
agent_params.network_wrappers['main'].heads_parameters = [DuelingQHeadParameters()]
agent_params.network_wrappers['main'].clip_gradients = 10
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(40000)
agent_params.exploration.epsilon_schedule = PieceWiseSchedule(
    [(LinearSchedule(1, 0.1, 1000000), EnvironmentSteps(1000000)),
     (LinearSchedule(0.1, 0.01, 10000000), EnvironmentSteps(1000000)),
     (ConstantSchedule(0.001), EnvironmentSteps(10000000))]
)
agent_params.memory = PrioritizedExperienceReplayParameters()
agent_params.memory.beta = LinearSchedule(0.4, 1, 12500000)  # 12.5M training iterations = 50M steps = 200M frames

###############
# Environment #
###############
env_params = Atari()
env_params.level = SingleLevelSelection(atari_deterministic_v4)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
vis_params.dump_mp4 = False

########
示例#13
0
    'action'].scheme = EmbedderScheme.Empty
agent_params.network_wrappers['actor'].heads_parameters[
    0].activation_function = 'sigmoid'
#agent_params.network_wrappers['critic'].clip_gradients = 100
#agent_params.network_wrappers['actor'].clip_gradients = 100

agent_params.algorithm.rate_for_copying_weights_to_target = 0.01  # Tau pg. 11
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(
    1)
agent_params.algorithm.discount = 1
agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000)
agent_params.exploration = TruncatedNormalParameters(
)  # AdditiveNoiseParameters()
steps_per_episode = 13
agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([
    (ConstantSchedule(0.5), EnvironmentSteps(100 * steps_per_episode)),
    (ExponentialSchedule(0.5, 0,
                         0.996), EnvironmentSteps(300 * steps_per_episode))
])
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
agent_params.input_filter = MujocoInputFilter()
agent_params.output_filter = MujocoOutputFilter()
agent_params.network_wrappers['actor'].learning_rate = 0.0001
agent_params.network_wrappers['critic'].learning_rate = 0.001

##############################
#      Gym                   #
##############################
env_params = GymEnvironmentParameters()
env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment'
agent_params = SILAgentParameters()

agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 5
agent_params.algorithm.beta_entropy = 0.01

agent_params.network_wrappers['main'].middleware_parameters = FCMiddlewareParameters()
agent_params.network_wrappers['main'].learning_rate = 0.0007
agent_params.network_wrappers['main'].batch_size = 32
agent_params.network_wrappers['main'].async_training = False
# scaling down the gradients + each agent trains with a batch of 32 = one agent training with a batch of 512
agent_params.network_wrappers['main'].scale_down_gradients_by_number_of_workers_for_sync_training = True

agent_params.memory.shared_memory = True  # according to the SIL code, only a single replay buffer is used
agent_params.memory.max_size = (MemoryGranularity.Transitions, 100000)
agent_params.memory.beta = ConstantSchedule(0.1)  # called bias correction in the paper - this value is only used for
                                                  # hard exploration problems

agent_params.exploration = CategoricalParameters()

###############
# Environment #
###############
env_params = Atari()
env_params.level = SingleLevelSelection(atari_deterministic_v4)

vis_params = VisualizationParameters()
vis_params.video_dump_methods = [SelectedPhaseOnlyDumpMethod(RunPhase.TEST), MaxDumpMethod()]
vis_params.dump_mp4 = False

 def __init__(self):
     super().__init__()
     self.max_size = (MemoryGranularity.Transitions, 1000000)
     self.alpha = 0.6
     self.beta = ConstantSchedule(0.4)
     self.epsilon = 1e-6
示例#16
0
agent_params.algorithm.policy_gradient_rescaler = PolicyGradientRescaler.GAE
agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.gae_lambda = 0.96
agent_params.algorithm.beta_entropy = 0

agent_params.network_wrappers['main'].clip_gradients = 10.0
agent_params.network_wrappers['main'].learning_rate = 0.00001
# agent_params.network_wrappers['main'].batch_size = 20
agent_params.network_wrappers['main'].input_embedders_parameters = {
    "screen": InputEmbedderParameters(input_rescaling={'image': 3.0})
}

agent_params.exploration = AdditiveNoiseParameters()
agent_params.exploration.noise_schedule = ConstantSchedule(0.05)
# agent_params.exploration.noise_schedule = LinearSchedule(0.4, 0.05, 100000)
agent_params.exploration.evaluation_noise = 0.05

agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

###############
# Environment #
###############

env_params = StarCraft2EnvironmentParameters(level='CollectMineralShards')
env_params.feature_screen_maps_to_use = [5]
env_params.feature_minimap_maps_to_use = [5]