Python S3FileSystem примеры использования

Язык программирования: Python

Пространство имен/Пакет: datasets.filesystems

Класс/Тип: S3FileSystem

Примеров на hotexamples.com: 3

Python S3FileSystem - 3 примера найдено. Это лучшие примеры Python кода для datasets.filesystems.S3FileSystem, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

S3FileSystem(3)

Основные методы

S3FileSystem (3)

Пример #1

Показать файл

Файл: test_dataset_dict.py Проект: c-w-m/hgf-datasets

    def test_save_and_load_to_s3(self):
        # Mocked AWS Credentials for moto.
        os.environ["AWS_ACCESS_KEY_ID"] = "fake_access_key"
        os.environ["AWS_SECRET_ACCESS_KEY"] = "fake_secret_key"
        os.environ["AWS_SECURITY_TOKEN"] = "fake_secrurity_token"
        os.environ["AWS_SESSION_TOKEN"] = "fake_session_token"

        s3 = boto3.client("s3", region_name="us-east-1")
        mock_bucket = "moto-mock-s3-bucket"
        # We need to create the bucket since this is all in Moto's 'virtual' AWS account
        s3.create_bucket(Bucket=mock_bucket)
        dataset_path = f"s3://{mock_bucket}/datasets/dict"

        fs = S3FileSystem(key="fake_access_key", secret="fake_secret")

        dsets = self._create_dummy_dataset_dict()
        dsets.save_to_disk(dataset_path, fs)

        del dsets

        dsets = load_from_disk(dataset_path, fs)

        self.assertListEqual(sorted(dsets), ["test", "train"])
        self.assertEqual(len(dsets["train"]), 30)
        self.assertListEqual(dsets["train"].column_names, ["filename"])
        self.assertEqual(len(dsets["test"]), 30)
        self.assertListEqual(dsets["test"].column_names, ["filename"])
        del dsets

Пример #2

Показать файл

def test_is_remote_filesystem():

    fs = S3FileSystem(key="fake_access_key", secret="fake_secret")

    is_remote = is_remote_filesystem(fs)
    assert is_remote is True

    fs = fsspec.filesystem("file")

    is_remote = is_remote_filesystem(fs)
    assert is_remote is False

Пример #3

Показать файл

Файл: test_single_machine_training.py Проект: yym064/deep-learning-containers

def test_distilbert_base(docker_image, processor, instance_type,
                         sagemaker_local_session, py_version):
    from datasets import load_dataset
    from transformers import AutoTokenizer

    # tokenizer used in preprocessing
    tokenizer_name = 'distilbert-base-uncased'

    # dataset used
    dataset_name = 'imdb'

    # s3 key prefix for the data
    s3_prefix = 'samples/datasets/imdb'
    # load dataset
    dataset = load_dataset(dataset_name)

    # download tokenizer
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)

    # tokenizer helper function
    def tokenize(batch):
        return tokenizer(batch['text'], padding='max_length', truncation=True)

    # load dataset
    train_dataset, test_dataset = load_dataset('imdb', split=['train', 'test'])
    test_dataset = test_dataset.shuffle().select(
        range(100))  # smaller the size for test dataset to 10k

    # tokenize dataset
    train_dataset = train_dataset.map(tokenize,
                                      batched=True,
                                      batch_size=len(train_dataset))
    test_dataset = test_dataset.map(tokenize,
                                    batched=True,
                                    batch_size=len(test_dataset))

    # set format for pytorch
    train_dataset.rename_column_("label", "labels")
    train_dataset.set_format('torch',
                             columns=['input_ids', 'attention_mask', 'labels'])
    test_dataset.rename_column_("label", "labels")
    test_dataset.set_format('torch',
                            columns=['input_ids', 'attention_mask', 'labels'])

    # hyperparameters, which are passed into the training job
    hyperparameters = {
        'max_steps': 5,
        'train_batch_size': 4,
        'model_name': 'distilbert-base-uncased'
    }

    s3 = S3FileSystem()

    # save train_dataset to s3
    training_input_path = f's3://{sagemaker_local_session.default_bucket()}/{s3_prefix}/train'
    train_dataset.save_to_disk(training_input_path, fs=s3)

    # save test_dataset to s3
    test_input_path = f's3://{sagemaker_local_session.default_bucket()}/{s3_prefix}/test'
    test_dataset.save_to_disk(test_input_path, fs=s3)

    estimator = HuggingFace(entry_point=distrilbert_script,
                            instance_type='local_gpu',
                            sagemaker_session=sagemaker_local_session,
                            image_uri=docker_image,
                            instance_count=1,
                            role=ROLE,
                            py_version=py_version,
                            hyperparameters=hyperparameters)

    estimator.fit({
        'train':
        f's3://{sagemaker_local_session.default_bucket()}/{s3_prefix}/train',
        'test':
        f's3://{sagemaker_local_session.default_bucket()}/{s3_prefix}/test'
    })