Python Dataset.create примеры использования

Язык программирования: Python

Пространство имен/Пакет: clearml

Класс/Тип: Dataset

Метод/Функция: create

Примеров на hotexamples.com: 6

Python Dataset.create - 6 примеров найдено. Это лучшие примеры Python кода для clearml.Dataset.create, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

get(13)

create(6)

delete(1)

list_datasets(1)

Основные методы

get (13)

create (6)

delete (1)

list_datasets (1)

Пример #1

Показать файл

    target_folder='/Users/guardi/MSCA/MLOps/ClearML/working_dataset',
    overwrite=True)
print(f"dataset_folder: {dataset_folder}")

df = pd.read_csv(dataset_folder + '/transformed_dataset.csv')

X = df[[
    'GDP per capita', 'Social support', 'Freedom to make life choices',
    'Generosity', 'Perceptions of corruption', 'Social Generosity'
]]
# target
y = df['Healthy life expectancy']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# store the dataset split into a pickle file
with open(dataset_folder + '/transformed_train.pkl', 'wb') as f:
    pickle.dump([X_train, X_test, y_train, y_test], f)

# create a new version of the dataset with the pickle file
new_dataset = Dataset.create(dataset_project='assignment1',
                             dataset_name='transformed_data_split',
                             parent_datasets=[dataset])
new_dataset.sync_folder(local_path=dataset_folder)
new_dataset.upload()
new_dataset.finalize()

print('we are done')

Пример #2

Показать файл

Файл: split_clean.py Проект: tguardi/MLOps

    target_folder='/Users/guardi/MSCA/MLOps/ClearML/working_dataset',
    overwrite=True)
print(f"dataset_folder: {dataset_folder}")

df = pd.read_csv(dataset_folder + '/clean_data.csv')

X = df[[
    'GDP per capita', 'Social support', 'Freedom to make life choices',
    'Generosity', 'Perceptions of corruption'
]]
# target
y = df['Healthy life expectancy']

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# store the dataset split into a pickle file
with open(dataset_folder + '/clean_train.pkl', 'wb') as f:
    pickle.dump([X_train, X_test, y_train, y_test], f)

# create a new version of the dataset with the pickle file
new_dataset = Dataset.create(dataset_project='assignment1',
                             dataset_name='clean_data_split',
                             parent_datasets=[dataset])
new_dataset.sync_folder(local_path=dataset_folder)
new_dataset.upload()
new_dataset.finalize()

print('we are done')

Пример #3

Показать файл

# create a copy that we can change,
dataset_folder = dataset.get_mutable_local_copy(
    target_folder='working_dataset', overwrite=True)
print(f"dataset_folder: {dataset_folder}")

# open the dataset pickle file
with open(dataset_folder + '/iris_dataset.pkl', 'rb') as f:
    iris = pickle.load(f)

# "process" data (i.e. we split it into train/test)
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

# store the dataset split into a pickle file
with open(dataset_folder + '/iris_dataset.pkl', 'wb') as f:
    pickle.dump([X_train, X_test, y_train, y_test], f)

# create a new version of the dataset with the pickle file
new_dataset = Dataset.create(dataset_project='uchicago',
                             dataset_name='dataset2',
                             parent_datasets=[dataset])
new_dataset.sync_folder(local_path=dataset_folder)
new_dataset.upload()
new_dataset.finalize()

print('we are done')

Пример #4

Показать файл

Файл: clearml_upload_cub200_dataset.py Проект: ecm200/caltech_birds

parser.add_argument(
    '--clearml-project',
    dest='clearml_project',
    type=str,
    help=
    'The name of the clearml project that the dataset will be stored and published to.',
    default='Caltech Birds/Datasets')
parser.add_argument(
    '--clearml-dataset-url',
    dest='clearml_dataset_url',
    type=str,
    help=
    'Location of where the dataset files should be stored. Default is Azure Blob Storage. Format is azure://storage_account/container',
    default='azure://clearmllibrary/datasets')
args = parser.parse_args()

for task_type in ['train', 'test']:
    print('[INFO] Versioning and uploading {0} dataset for CUB200 2011'.format(
        task_type))
    dataset = Dataset.create('cub200_2011_{0}_dataset'.format(task_type),
                             dataset_project=args.clearml_project)
    dataset.add_files(path=os.path.join(args.dataset_basedir, task_type),
                      verbose=False)
    dataset.upload(output_url=args.clearml_dataset_url)
    print('[INFO] {0} Dataset finalized....'.format(task_type), end='')
    dataset.finalize()
    print('done.')

    print('[INFO] {0} Dataset published....'.format(task_type), end='')
    dataset.publish()
    print('done.')

Пример #5

Показать файл

# Download CIFAR dataset and create a dataset with ClearML's Dataset class
from clearml import StorageManager, Dataset

manager = StorageManager()

dataset_path = manager.get_local_copy(
    remote_url="https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz")

dataset = Dataset.create(dataset_name="cifar_dataset",
                         dataset_project="dataset_examples")

# Prepare and clean data here before it is added to the dataset

dataset.add_files(path=dataset_path)

# Dataset is uploaded to the ClearML Server by default
dataset.upload()

dataset.finalize()

Пример #6

Показать файл

                    if cfg.delete_earlier_versions:
                        for t in test_if_exists:
                            try:
                                Dataset.delete(t['id'])
                                print(f'Deleted {t}')
                            except ValueError:
                                print(f'Could not delete dataset - has children?')

            except ValueError:
                pass

            print(f'Now with {dataset_name}, creating!')
            if cfg.use_lineage:
                new_dataset = Dataset.create(
                    dataset_name=dataset_name+stage,
                    dataset_project=project_name,
                    parent_datasets=[cfg.input_dataset_id]
                )
                print('...Done')

                # remove other sizes
                for other_folder_rel in all_subfolders_rel:
                    if other_folder_rel != rel_folder:
                        new_dataset.remove_files(str(other_folder_rel)+"/*", verbose=False)
                # remove other stages
                for not_stage in ['train', 'val', 'test']:
                    if not_stage != stage:
                        new_dataset.remove_files(str(rel_folder/not_stage)+"/*", verbose=False)

                # upload should be no-op in this case
                rmed = new_dataset.list_removed_files(cfg.input_dataset_id)