def _do_live_policy_checkpoint(trainer, training_iteration):
            local_train_policy = trainer.workers.local_worker(
            ).policy_map[TRAIN_POLICY]
            checkpoints_dir = os.path.join(experiment_save_dir,
                                           "policy_checkpoints")
            checkpoint_name = f"policy_{trainer.claimed_policy_num}_{datetime_str()}_iter_{training_iteration}.dill"
            checkpoint_save_path = os.path.join(checkpoints_dir,
                                                checkpoint_name)
            local_train_policy.save_model_weights(
                save_file_path=checkpoint_save_path,
                remove_scope_prefix=TRAIN_POLICY)
            policy_key = os.path.join(base_experiment_name,
                                      full_experiment_name,
                                      "policy_checkpoints", checkpoint_name)
            storage_client = connect_storage_client()
            upload_file(storage_client=storage_client,
                        bucket_name=BUCKET_NAME,
                        object_key=policy_key,
                        local_source_path=checkpoint_save_path)

            locks_checkpoint_name = f"dch_population_checkpoint_{datetime_str()}"

            ray_get_and_free(
                trainer.live_table_tracker.set_latest_key_for_claimed_policy.
                remote(
                    new_key=policy_key,
                    request_locks_checkpoint_with_name=locks_checkpoint_name))
Пример #2
0
    def _checkpoint_payoff_table(self):
        table_save_file_key = os.path.join(
            self.payoff_table_save_key_prefix_dir,
            f"payoff_table_{self._payoff_table.size()}_polices_"
            f"{self._payoff_table.get_num_pending_policies()}_pending_"
            f"{gethostname()}_pid_{os.getpid()}_{datetime_str()}.dill")
        table_save_file_path = get_default_path_on_disk_for_minio_key(
            object_name=table_save_file_key)
        ensure_dir(table_save_file_path)

        self._payoff_table.save_to_dill_file_(
            save_file_path=table_save_file_path)

        upload_file(storage_client=self._storage_client,
                    bucket_name=self._bucket_name,
                    object_key=table_save_file_key,
                    local_source_path=table_save_file_path)
        self._latest_payoff_table_key = table_save_file_key

        upload_file(storage_client=self._storage_client,
                    bucket_name=self._bucket_name,
                    object_key=self._latest_checkpoint_key,
                    local_source_path=table_save_file_path)
        if CLOUD_PREFIX:
            post_key(storage_client=self._storage_client,
                     bucket_name=self._bucket_name,
                     key=self._latest_checkpoint_key,
                     bulletin_prefix=f"{CLOUD_PREFIX}bulletin")
Пример #3
0
    def _save_checkpoint(self, checkpoint_name):
        checkpoint_key = f"{CLOUD_PREFIX}lock_server_checkpoints/{WORKER_ID}/{checkpoint_name}_utc_{time.time()}.dill"
        checkpoint_path = get_default_path_on_disk_for_minio_key(
            checkpoint_key)
        ensure_dir(checkpoint_path)
        with open(checkpoint_path, "wb+") as dill_file:
            dill.dump(self._item_locks, dill_file)
        upload_file(storage_client=self._storage_client,
                    bucket_name=self._bucket_name,
                    object_key=checkpoint_key,
                    local_source_path=checkpoint_path)
        logger.info(
            colored(f"Saved locks checkpoint to {checkpoint_key}", "magenta"))

        upload_file(storage_client=self._storage_client,
                    bucket_name=self._bucket_name,
                    object_key=LATEST_CHECKPOINT_KEY,
                    local_source_path=checkpoint_path)
        logger.info(
            colored(f"Also Saved locks checkpoint to {LATEST_CHECKPOINT_KEY}",
                    "magenta"))
        if CLOUD_PREFIX:
            post_key(storage_client=self._storage_client,
                     bucket_name=self._bucket_name,
                     key=LATEST_CHECKPOINT_KEY,
                     bulletin_prefix=f"{CLOUD_PREFIX}bulletin")
Пример #4
0
    def submit_new_policy(self,
                          policy_file_key,
                          policy_weights,
                          steps_trained,
                          extra_data: dict = None):
        self.add_to_cache(policy_key=policy_file_key, weights=policy_weights)

        # save policy weights to disk and cloud

        disk_path = get_default_path_on_disk_for_minio_key(
            object_name=policy_file_key)
        ensure_dir(disk_path)
        with open(disk_path, "wb") as dill_file:
            dump(obj=policy_weights, file=dill_file)

        upload_file(self.storage_client,
                    bucket_name=self.bucket_name,
                    object_key=policy_file_key,
                    local_source_path=disk_path)

        if policy_file_key not in self.catalog:
            ranked_games_played = 0
            skill_ranking = Rating()
            policy_mixture = None
            data = {
                STEPS_TRAINED: steps_trained,
                TIME_CREATED: time.time() - self.start_time
            }

            if extra_data is not None:
                data.update(extra_data)
            self.catalog[policy_file_key] = (skill_ranking, data,
                                             ranked_games_played,
                                             policy_mixture)
Пример #5
0
        def stop_and_submit_if_not_improving_on_train_result_callback(params):
            trainer = params['trainer']
            result = params['result']
            result['stop_signal'] = False

            should_submit = False
            submit_reason = None

            if not hasattr(trainer, 'previous_threshold_check_reward'):
                trainer.previous_threshold_check_reward = -100.0
                trainer.next_threshold_check_timesteps = SUBMISSION_IMPROVEMENT_THRESHOLD_PER_TIMESTEPS + SUBMISSION_THRESHOLD_STEPS_START
                print(
                    f"fist threshold check at {trainer.next_threshold_check_timesteps} timesteps"
                )

            if result['timesteps_total'] >= SUBMISSION_THRESHOLD_STEPS_START and \
                    SUBMISSION_IMPROVEMENT_THRESHOLD_PER_TIMESTEPS is not None and \
                    SUBMISSION_IMPROVEMENT_THRESHOLD_REWARD is not None:

                if result[
                        'timesteps_total'] >= trainer.next_threshold_check_timesteps:
                    trainer.next_threshold_check_timesteps = max(
                        trainer.next_threshold_check_timesteps +
                        SUBMISSION_IMPROVEMENT_THRESHOLD_PER_TIMESTEPS,
                        result['timesteps_total'] + 1)

                    target_reward = trainer.previous_threshold_check_reward + SUBMISSION_IMPROVEMENT_THRESHOLD_REWARD
                    result['target_reward'] = target_reward
                    measured_reward = result['policy_reward_mean'][
                        TRAIN_POLICY]
                    print(
                        f"{result['timesteps_total']} timesteps: {TRAIN_POLICY} reward: {measured_reward}, target reward: {target_reward}"
                    )

                    if measured_reward < target_reward and \
                            (SUBMISSION_MIN_TIMESTEPS is None or result['timesteps_total'] >= SUBMISSION_MIN_TIMESTEPS):
                        should_submit = True
                        submit_reason = f"plateaued at {measured_reward} reward"
                        print(
                            f"{result['timesteps_total']} timesteps: {TRAIN_POLICY} didn\'t reach target reward. Submitting policy."
                        )
                    else:
                        print(
                            f"next threshold check at {trainer.next_threshold_check_timesteps} timesteps"
                        )

                    trainer.previous_threshold_check_reward = measured_reward

            if SUBMISSION_MAX_TIMESTEPS is not None and result[
                    'timesteps_total'] >= SUBMISSION_MAX_TIMESTEPS:
                should_submit = True
                submit_reason = f"hit max timesteps of {SUBMISSION_MAX_TIMESTEPS}"
                print(f"Trainer hit max timesteps. Submitting policy.")

            if should_submit:
                assert submit_reason is not None
                result['stop_signal'] = True
                local_train_policy = trainer.workers.local_worker(
                ).policy_map[TRAIN_POLICY]

                tags = [
                    *SUBMISSION_POLICY_TAGS, submit_reason,
                    f"timesteps: {result['timesteps_total']}",
                    f"episodes: {result['episodes_total']}"
                ]
                if hasattr(local_train_policy, "init_tag"):
                    tags += local_train_policy.init_tag

                checkpoints_dir = os.path.join(experiment_save_dir,
                                               "policy_submissions")
                checkpoint_name = f"{datetime_str()}_iter_{result['training_iteration']}.dill"
                checkpoint_save_path = os.path.join(checkpoints_dir,
                                                    checkpoint_name)
                local_train_policy.save_model_weights(
                    save_file_path=checkpoint_save_path,
                    remove_scope_prefix=TRAIN_POLICY)
                policy_key = os.path.join(base_experiment_name,
                                          full_experiment_name,
                                          "policy_submissions",
                                          checkpoint_name)
                storage_client = connect_storage_client()
                upload_file(storage_client=storage_client,
                            bucket_name=BUCKET_NAME,
                            object_key=policy_key,
                            local_source_path=checkpoint_save_path)
                trainer.manager_interface.submit_new_policy_for_population(
                    policy_weights_key=policy_key,
                    policy_config_key=TRAIN_POLICY_MODEL_CONFIG_KEY,
                    policy_class_name=TRAIN_POLICY_CLASS.__name__,
                    policy_tags=tags)
        100,
    }

    trainer = TRAINER_CLASS(config=trainer_config)

    if key_exists(storage_client=storage_client,
                  bucket_name="stratego",
                  object_name=POKER_ARCH1_MODEL_CONFIG_KEY):
        response = input(
            f"\n\nThe key {POKER_ARCH1_MODEL_CONFIG_KEY} already exists. "
            f"\nDo you really want to override it?"
            f"\nOther programs may already by using this key."
            f"\nEnter 'y' to overwrite it.\n")
        if response != 'y':
            print("exiting...")
            exit(0)
        response = input(
            f"Are you REALLY sure you want to overwrite {POKER_ARCH1_MODEL_CONFIG_KEY}?????"
            f"\nEnter 'y' to overwrite it (last warning).\n")
        if response != 'y':
            print("exiting...")
            exit(0)

    upload_file(storage_client=storage_client,
                bucket_name="stratego",
                object_key=POKER_ARCH1_MODEL_CONFIG_KEY,
                local_source_path=os.path.join(trainer._logdir,
                                               f"{TRAIN_POLICY}_config.json"))

    logger.info("DONE!")
Пример #7
0
        def submit_ocassionaly_on_train_result_callback(params):
            trainer = params['trainer']
            result = params['result']

            should_submit = False
            submit_reason = None

            if not hasattr(trainer, 'next_submit'):
                trainer.next_submit = SUBMISSION_IMPROVEMENT_THRESHOLD_PER_STEPS + SUBMISSION_THRESHOLD_STEPS_START

            if result['timesteps_total'] >= trainer.next_submit:
                trainer.next_submit = max(
                    trainer.next_submit +
                    SUBMISSION_IMPROVEMENT_THRESHOLD_PER_STEPS +
                    SUBMISSION_THRESHOLD_STEPS_START,
                    result['timesteps_total'] + 1)

                if SUBMISSION_MIN_STEPS is None or result[
                        'timesteps_total'] >= SUBMISSION_MIN_STEPS:
                    should_submit = True
                    submit_reason = f"periodic_checkpoint"
                    print(
                        colored(
                            f"{result['timesteps_total']} steps: {TRAIN_POLICY} didn\'t reach target reward. Submitting policy.",
                            "white"))
                else:
                    print(
                        colored(f"next submit at {trainer.next_submit} steps",
                                "white"))

            if should_submit:
                assert submit_reason is not None
                local_train_policy = trainer.workers.local_worker(
                ).policy_map[TRAIN_POLICY]

                tags = [
                    *SUBMISSION_POLICY_TAGS, submit_reason,
                    f"timesteps: {result['timesteps_total']}",
                    f"episodes: {result['episodes_total']}",
                    f"iter: {result['training_iteration']}"
                ]
                if hasattr(local_train_policy, "init_tag"):
                    tags += local_train_policy.init_tag

                checkpoints_dir = os.path.join(experiment_save_dir,
                                               "policy_submissions")
                checkpoint_name = f"{datetime_str()}_iter_{result['training_iteration']}.dill"
                checkpoint_save_path = os.path.join(checkpoints_dir,
                                                    checkpoint_name)
                local_train_policy.save_model_weights(
                    save_file_path=checkpoint_save_path,
                    remove_scope_prefix=TRAIN_POLICY)
                policy_key = os.path.join(base_experiment_name,
                                          full_experiment_name,
                                          "policy_submissions",
                                          checkpoint_name)
                storage_client = connect_storage_client()
                upload_file(storage_client=storage_client,
                            bucket_name=BUCKET_NAME,
                            object_key=policy_key,
                            local_source_path=checkpoint_save_path)
                trainer.manager_interface.submit_new_policy_for_population(
                    policy_weights_key=policy_key,
                    policy_config_key=TRAIN_POLICY_MODEL_CONFIG_KEY,
                    policy_class_name=TRAIN_POLICY_CLASS.__name__,
                    policy_tags=tags)