Пример #1
0
    def _setup(self, variant):
        set_seed(variant['run_params']['seed'])

        self._variant = variant
        self._session = tf.keras.backend.get_session()

        self.train_generator = None
        self._built = False
Пример #2
0
    def _setup(self, variant):
        set_seed(variant['run_params']['seed'])

        self._variant = variant

        gpu_options = tf.GPUOptions(allow_growth=True)
        session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        tf.keras.backend.set_session(session)
        self._session = tf.keras.backend.get_session()

        self.train_generator = None
        self._built = False
Пример #3
0
    def _setup(self, variant):
        set_seed(variant['run_params']['seed'])

        self._variant = variant

        self.SAVE_PER_ALGO = {
            'default': self.save_mbpo,
            'MBPO': self.save_mbpo,
            'CMBPO': self.save_cmbpo
        }

        self.RESTORE_PER_ALGO = {
            'default': self.restore_mbpo,
            'MBPO': self.restore_mbpo,
            'CMBPO': self.restore_cmbpo,
        }

        gpu_options = tf.GPUOptions(allow_growth=True)
        session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
        tf.keras.backend.set_session(session)
        self._session = tf.keras.backend.get_session()

        self.train_generator = None
        self._built = False
Пример #4
0
def main():
    import sys
    example_args = get_parser().parse_args(sys.argv[1:])

    variant_spec = get_variant_spec(example_args)
    command_line_args = example_args
    print('vriant spec: {}'.format(variant_spec))
    params = variant_spec.get('algorithm_params')
    local_dir = os.path.join(params.get('log_dir'), params.get('domain'))

    resources_per_trial = _normalize_trial_resources(
        command_line_args.resources_per_trial, command_line_args.trial_cpus,
        command_line_args.trial_gpus, command_line_args.trial_extra_cpus,
        command_line_args.trial_extra_gpus)
    experiment_id = params.get('exp_name')

    #### add pool_load_max_size to experiment_id
    if 'pool_load_max_size' in variant_spec['algorithm_params']['kwargs']:
        max_size = variant_spec['algorithm_params']['kwargs'][
            'pool_load_max_size']
        experiment_id = '{}_{}e3'.format(experiment_id, int(max_size / 1000))
    ####

    variant_spec = add_command_line_args_to_variant_spec(
        variant_spec, command_line_args)

    if command_line_args.video_save_frequency is not None:
        assert 'algorithm_params' in variant_spec
        variant_spec['algorithm_params']['kwargs']['video_save_frequency'] = (
            command_line_args.video_save_frequency)

    variant = variant_spec
    # init
    set_seed(variant['run_params']['seed'])
    gpu_options = tf.GPUOptions(allow_growth=True)
    session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    tf.keras.backend.set_session(session)

    # build
    variant = copy.deepcopy(variant)

    tester.set_hyper_param(**variant)
    tester.add_record_param(['run_params.seed', 'info'])
    tester.configure(task_name='policy_learn',
                     private_config_path=os.path.join(get_package_path(),
                                                      'rla_config.yaml'),
                     run_file='main.py',
                     log_root=get_package_path())
    tester.log_files_gen()
    tester.print_args()

    environment_params = variant['environment_params']
    training_environment = (get_environment_from_params(
        environment_params['training']))
    evaluation_environment = (get_environment_from_params(
        environment_params['evaluation'](variant)) if 'evaluation'
                              in environment_params else training_environment)

    replay_pool = (get_replay_pool_from_variant(variant, training_environment))
    sampler = get_sampler_from_variant(variant)
    Qs = get_Q_function_from_variant(variant, training_environment)
    policy = get_policy_from_variant(variant, training_environment, Qs)
    initial_exploration_policy = (get_policy('UniformPolicy',
                                             training_environment))

    #### get termination function
    domain = environment_params['training']['domain']
    static_fns = mopo.static[domain.lower()]
    ####
    print("[ DEBUG ] KWARGS: {}".format(variant['algorithm_params']['kwargs']))

    algorithm = get_algorithm_from_variant(
        variant=variant,
        training_environment=training_environment,
        evaluation_environment=evaluation_environment,
        policy=policy,
        initial_exploration_policy=initial_exploration_policy,
        Qs=Qs,
        pool=replay_pool,
        static_fns=static_fns,
        sampler=sampler,
        session=session)
    print('[ DEBUG ] finish construct model, start training')
    # train
    list(algorithm.train())