def _save_to_store(self, checkpoint_dir):
        """
        save_to_store() uploads the policy checkpoint, gifs and videos to the S3 data store. It reads the checkpoint state files and
        uploads only the latest checkpoint files to S3. It is used by the trainer in Coach when used in the distributed mode.
        """
        try:
            # remove lock file if it exists
            self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value)

            # Acquire lock
            self.mc.put_object(self.params.bucket_name, SyncFiles.LOCKFILE.value, io.BytesIO(b''), 0)

            state_file = CheckpointStateFile(os.path.abspath(checkpoint_dir))
            if state_file.exists():
                ckpt_state = state_file.read()
                checkpoint_file = None
                for root, dirs, files in os.walk(checkpoint_dir):
                    for filename in files:
                        if filename == CheckpointStateFile.checkpoint_state_filename:
                            checkpoint_file = (root, filename)
                            continue
                        if filename.startswith(ckpt_state.name):
                            abs_name = os.path.abspath(os.path.join(root, filename))
                            rel_name = os.path.relpath(abs_name, checkpoint_dir)
                            self.mc.fput_object(self.params.bucket_name, rel_name, abs_name)

                abs_name = os.path.abspath(os.path.join(checkpoint_file[0], checkpoint_file[1]))
                rel_name = os.path.relpath(abs_name, checkpoint_dir)
                self.mc.fput_object(self.params.bucket_name, rel_name, abs_name)

            # upload Finished if present
            if os.path.exists(os.path.join(checkpoint_dir, SyncFiles.FINISHED.value)):
                self.mc.put_object(self.params.bucket_name, SyncFiles.FINISHED.value, io.BytesIO(b''), 0)

            # upload Ready if present
            if os.path.exists(os.path.join(checkpoint_dir, SyncFiles.TRAINER_READY.value)):
                self.mc.put_object(self.params.bucket_name, SyncFiles.TRAINER_READY.value, io.BytesIO(b''), 0)

            # release lock
            self.mc.remove_object(self.params.bucket_name, SyncFiles.LOCKFILE.value)

            if self.params.expt_dir and os.path.exists(self.params.expt_dir):
                for filename in os.listdir(self.params.expt_dir):
                    if filename.endswith((".csv", ".json")):
                        self.mc.fput_object(self.params.bucket_name, filename, os.path.join(self.params.expt_dir, filename))

            if self.params.expt_dir and os.path.exists(os.path.join(self.params.expt_dir, 'videos')):
                for filename in os.listdir(os.path.join(self.params.expt_dir, 'videos')):
                        self.mc.fput_object(self.params.bucket_name, filename, os.path.join(self.params.expt_dir, 'videos', filename))

            if self.params.expt_dir and os.path.exists(os.path.join(self.params.expt_dir, 'gifs')):
                for filename in os.listdir(os.path.join(self.params.expt_dir, 'gifs')):
                        self.mc.fput_object(self.params.bucket_name, filename, os.path.join(self.params.expt_dir, 'gifs', filename))

        except ResponseError as e:
            print("Got exception: %s\n while saving to S3", e)
    def save_to_store(self):
        try:
            s3_client = self._get_client()
            base_checkpoint_dir = self.params.base_checkpoint_dir
            for agent_key, bucket in self.params.buckets.items():
                # remove lock file if it exists
                s3_client.delete_object(Bucket=bucket, Key=self._get_s3_key(SyncFiles.LOCKFILE.value, agent_key))

                # acquire lock
                s3_client.upload_fileobj(Fileobj=io.BytesIO(b''),
                                         Bucket=bucket,
                                         Key=self._get_s3_key(SyncFiles.LOCKFILE.value, agent_key))

                checkpoint_dir = base_checkpoint_dir if len(self.graph_manager.agents_params) == 1 else \
                    os.path.join(base_checkpoint_dir, agent_key)

                state_file = CheckpointStateFile(os.path.abspath(checkpoint_dir))
                ckpt_state = None
                check_point_key_list = []
                if state_file.exists():
                    ckpt_state = state_file.read()
                    checkpoint_file = None
                    num_files_uploaded = 0
                    start_time = time.time()
                    for root, _, files in os.walk(checkpoint_dir):
                        for filename in files:
                            if filename == CheckpointStateFile.checkpoint_state_filename:
                                checkpoint_file = (root, filename)
                                continue
                            if filename.startswith(ckpt_state.name):
                                abs_name = os.path.abspath(os.path.join(root, filename))
                                rel_name = os.path.relpath(abs_name, checkpoint_dir)
                                s3_client.upload_file(Filename=abs_name,
                                                      Bucket=bucket,
                                                      Key=self._get_s3_key(rel_name, agent_key),
                                                      Config=boto3.s3.transfer.TransferConfig(multipart_threshold=1))
                                check_point_key_list.append(self._get_s3_key(rel_name, agent_key))
                                num_files_uploaded += 1
                    time_taken = time.time() - start_time
                    LOG.info("Uploaded %s files for checkpoint %s in %.2f seconds", num_files_uploaded, ckpt_state.num, time_taken)
                    if check_point_key_list:
                        self.delete_queues[agent_key].put(check_point_key_list)

                    abs_name = os.path.abspath(os.path.join(checkpoint_file[0], checkpoint_file[1]))
                    rel_name = os.path.relpath(abs_name, checkpoint_dir)
                    s3_client.upload_file(Filename=abs_name,
                                          Bucket=bucket,
                                          Key=self._get_s3_key(rel_name, agent_key))

                # upload Finished if present
                if os.path.exists(os.path.join(checkpoint_dir, SyncFiles.FINISHED.value)):
                    s3_client.upload_fileobj(Fileobj=io.BytesIO(b''),
                                             Bucket=bucket,
                                             Key=self._get_s3_key(SyncFiles.FINISHED.value, agent_key))

                # upload Ready if present
                if os.path.exists(os.path.join(checkpoint_dir, SyncFiles.TRAINER_READY.value)):
                    s3_client.upload_fileobj(Fileobj=io.BytesIO(b''),
                                             Bucket=bucket,
                                             Key=self._get_s3_key(SyncFiles.TRAINER_READY.value, agent_key))

                # release lock
                s3_client.delete_object(Bucket=bucket,
                                        Key=self._get_s3_key(SyncFiles.LOCKFILE.value, agent_key))

                # Upload the frozen graph which is used for deployment
                if self.graph_manager:
                    # checkpoint state is always present for the checkpoint dir passed.
                    # We make same assumption while we get the best checkpoint in s3_metrics
                    checkpoint_num = ckpt_state.num
                    self.write_frozen_graph(self.graph_manager, agent_key, checkpoint_num)
                    frozen_name = "model_{}.pb".format(checkpoint_num)
                    frozen_graph_fpath = os.path.join(SM_MODEL_PB_TEMP_FOLDER, agent_key,
                                                      frozen_name)
                    frozen_graph_s3_name = frozen_name if len(self.graph_manager.agents_params) == 1 \
                        else os.path.join(agent_key, frozen_name)
                    # upload the model_<ID>.pb to S3.
                    s3_client.upload_file(Filename=frozen_graph_fpath,
                                          Bucket=bucket,
                                          Key=self._get_s3_key(frozen_graph_s3_name, agent_key))
                    LOG.info("saved intermediate frozen graph: %s", self._get_s3_key(frozen_graph_s3_name, agent_key))

                    # Copy the best checkpoint to the SM_MODEL_OUTPUT_DIR
                    copy_best_frozen_model_to_sm_output_dir(bucket,
                                                            self.params.s3_folders[agent_key],
                                                            self.params.aws_region,
                                                            os.path.join(SM_MODEL_PB_TEMP_FOLDER, agent_key),
                                                            os.path.join(SM_MODEL_OUTPUT_DIR, agent_key),
                                                            self.params.s3_endpoint_url)

                # Clean up old checkpoints
                if ckpt_state and self.delete_queues[agent_key].qsize() > NUM_MODELS_TO_KEEP:
                    best_checkpoint = get_best_checkpoint(bucket,
                                                          self.params.s3_folders[agent_key],
                                                          self.params.aws_region,
                                                          self.params.s3_endpoint_url)
                    while self.delete_queues[agent_key].qsize() > NUM_MODELS_TO_KEEP:
                        key_list = self.delete_queues[agent_key].get()
                        if best_checkpoint and all(list(map(lambda file_name: best_checkpoint in file_name,
                                                            [os.path.split(file)[-1] for file in key_list]))):
                            self.delete_queues[agent_key].put(key_list)
                        else:
                            delete_iteration_ids = set()
                            for key in key_list:
                                s3_client.delete_object(Bucket=bucket, Key=key)
                                # Get the name of the file in the checkpoint directory that has to be deleted
                                # and extract the iteration id out of the name
                                file_in_checkpoint_dir = os.path.split(key)[-1]
                                if len(file_in_checkpoint_dir.split("_Step")) > 0:
                                    delete_iteration_ids.add(file_in_checkpoint_dir.split("_Step")[0])
                            LOG.info("Deleting the frozen models in s3 for the iterations: %s",
                                     delete_iteration_ids)
                            # Delete the model_{}.pb files from the s3 bucket for the previous iterations
                            for iteration_id in list(delete_iteration_ids):
                                frozen_name = "model_{}.pb".format(iteration_id)
                                frozen_graph_s3_name = frozen_name if len(self.graph_manager.agents_params) == 1 \
                                    else os.path.join(agent_key, frozen_name)
                                s3_client.delete_object(Bucket=bucket,
                                                        Key=self._get_s3_key(frozen_graph_s3_name, agent_key))
        except botocore.exceptions.ClientError:
            log_and_exit("Unable to upload checkpoint",
                         SIMAPP_S3_DATA_STORE_EXCEPTION,
                         SIMAPP_EVENT_ERROR_CODE_400)
        except Exception:
            log_and_exit("Unable to upload checkpoint",
                         SIMAPP_S3_DATA_STORE_EXCEPTION,
                         SIMAPP_EVENT_ERROR_CODE_500)
    def save_to_store(self):
        try:
            s3_client = self._get_client()
            checkpoint_dir = self.params.checkpoint_dir

            # remove lock file if it exists
            s3_client.delete_object(Bucket=self.params.bucket, Key=self._get_s3_key(SyncFiles.LOCKFILE.value))

            # acquire lock
            s3_client.upload_fileobj(Fileobj=io.BytesIO(b''),
                                     Bucket=self.params.bucket,
                                     Key=self._get_s3_key(SyncFiles.LOCKFILE.value))

            state_file = CheckpointStateFile(os.path.abspath(checkpoint_dir))
            ckpt_state = None
            if state_file.exists():
                ckpt_state = state_file.read()
                checkpoint_file = None
                num_files_uploaded = 0
                for root, _, files in os.walk(checkpoint_dir):
                    for filename in files:
                        if filename == CheckpointStateFile.checkpoint_state_filename:
                            checkpoint_file = (root, filename)
                            continue
                        if filename.startswith(ckpt_state.name):
                            abs_name = os.path.abspath(os.path.join(root, filename))
                            rel_name = os.path.relpath(abs_name, checkpoint_dir)
                            s3_client.upload_file(Filename=abs_name,
                                                  Bucket=self.params.bucket,
                                                  Key=self._get_s3_key(rel_name))
                            num_files_uploaded += 1
                logger.info("Uploaded {} files for checkpoint {}".format(num_files_uploaded, ckpt_state.num))

                abs_name = os.path.abspath(os.path.join(checkpoint_file[0], checkpoint_file[1]))
                rel_name = os.path.relpath(abs_name, checkpoint_dir)
                s3_client.upload_file(Filename=abs_name,
                                      Bucket=self.params.bucket,
                                      Key=self._get_s3_key(rel_name))

            # upload Finished if present
            if os.path.exists(os.path.join(checkpoint_dir, SyncFiles.FINISHED.value)):
                s3_client.upload_fileobj(Fileobj=io.BytesIO(b''),
                                         Bucket=self.params.bucket,
                                         Key=self._get_s3_key(SyncFiles.FINISHED.value))

            # upload Ready if present
            if os.path.exists(os.path.join(checkpoint_dir, SyncFiles.TRAINER_READY.value)):
                s3_client.upload_fileobj(Fileobj=io.BytesIO(b''),
                                         Bucket=self.params.bucket,
                                         Key=self._get_s3_key(SyncFiles.TRAINER_READY.value))

            # release lock
            s3_client.delete_object(Bucket=self.params.bucket, Key=self._get_s3_key(SyncFiles.LOCKFILE.value))

            # Upload the frozen graph which is used for deployment
            if self.graph_manager:
                self.write_frozen_graph(self.graph_manager)
                # upload the model_<ID>.pb to S3. NOTE: there's no cleanup as we don't know the best checkpoint
                for agent_params in self.graph_manager.agents_params:
                    iteration_id = self.graph_manager.level_managers[0].agents[agent_params.name].training_iteration
                    frozen_graph_fpath = os.path.join(SM_MODEL_OUTPUT_DIR, agent_params.name, "model.pb")
                    frozen_name = "model_{}.pb".format(iteration_id)
                    frozen_graph_s3_name = frozen_name if len(self.graph_manager.agents_params) == 1 \
                        else os.path.join(agent_params.name, frozen_name)
                    s3_client.upload_file(Filename=frozen_graph_fpath,
                                          Bucket=self.params.bucket,
                                          Key=self._get_s3_key(frozen_graph_s3_name))
                    logger.info("saved intermediate frozen graph: {}".format(self._get_s3_key(frozen_graph_s3_name)))

            # Clean up old checkpoints
            if ckpt_state:
                checkpoint_number_to_delete = ckpt_state.num - NUM_MODELS_TO_KEEP

                # List all the old checkpoint files to be deleted
                response = s3_client.list_objects_v2(Bucket=self.params.bucket,
                                                     Prefix=self._get_s3_key(""))
                if "Contents" in response:
                    for obj in response["Contents"]:
                        _, basename = os.path.split(obj["Key"])
                        if basename.startswith("{}_".format(checkpoint_number_to_delete)):
                            s3_client.delete_object(Bucket=self.params.bucket,
                                                    Key=obj["Key"])

        except botocore.exceptions.ClientError as e:
            utils.json_format_logger("Unable to upload checkpoint to {}, {}"
                                     .format(self.params.bucket, e.response['Error']['Code']),
                                     **utils.build_user_error_dict(utils.SIMAPP_S3_DATA_STORE_EXCEPTION,
                                                                   utils.SIMAPP_EVENT_ERROR_CODE_400))
            utils.simapp_exit_gracefully()
        except Exception as e:
            utils.json_format_logger("Unable to upload checkpoint to {}, {}"
                                     .format(self.params.bucket, e),
                                     **utils.build_system_error_dict(utils.SIMAPP_S3_DATA_STORE_EXCEPTION,
                                                                     utils.SIMAPP_EVENT_ERROR_CODE_500))
            utils.simapp_exit_gracefully()