Python TantalusApi.is_dataset_on_storage示例

编程语言: Python

命名空间/包名称: dbclients.tantalus

类/类型: TantalusApi

方法/功能: is_dataset_on_storage

hotexamples.com的示例: 2

Python TantalusApi.is_dataset_on_storage - 已找到2个示例。这些是从开源项目中提取的最受好评的dbclients.tantalus.TantalusApi.is_dataset_on_storage现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

TantalusApi(30)

get(26)

list(15)

get_storage_client(12)

update(10)

add_file(6)

get_or_create(6)

get_dataset_file_instances(5)

get_filepath(4)

get_storage(3)

get_file_resource_filename(2)

delete(2)

is_dataset_on_storage(2)

create(2)

check_file(2)

get_dataset_file_resources(1)

get_cache_client(1)

is_sequence_dataset_on_storage(1)

swap_file(1)

tag(1)

update_file(1)

示例#1

显示文件

def process_cellenone_dataset(dataset,
                              storage_name,
                              tag_name=None,
                              update=False,
                              remote_storage_name=None):

    assert len(dataset['libraries']) == 1
    library_id = dataset['libraries'][0]['library_id']

    tantalus_api = TantalusApi()

    if not tantalus_api.is_dataset_on_storage(dataset['id'], 'resultsdataset',
                                              storage_name):
        raise ValueError(
            f"dataset {dataset['id']} not on storage {storage_name}")

    # Assume all files in the raw dataset are under the directory:
    #  single_cell_indexing/Cellenone/Cellenone_images/{date}_{library_id}

    filename_prefix = 'single_cell_indexing/Cellenone/Cellenone_images/'

    source_dir = None
    for file_resource in tantalus_api.get_dataset_file_resources(
            dataset['id'], 'resultsdataset'):
        if source_dir is None:
            if not file_resource['filename'].startswith(filename_prefix):
                raise ValueError(
                    f"file {file_resource['filename']} is not in directory {filename_prefix}"
                )

            library_subdir = file_resource['filename'].split('/')[3]

            if not library_subdir.endswith(library_id):
                raise ValueError(
                    f"file {file_resource['filename']} is not in a directory ending with {library_id}"
                )

            source_dir = '/'.join(file_resource['filename'].split('/')[:4])

        elif not file_resource['filename'].startswith(source_dir):
            raise ValueError(
                f"file {file_resource['filename']} is not in directory {source_dir}"
            )

    assert source_dir is not None

    source_dir = tantalus_api.get_filepath(storage_name, source_dir)

    process_cellenone_images(
        library_id,
        source_dir,
        storage_name,
        tag_name=tag_name,
        update=update,
        remote_storage_name=remote_storage_name,
    )

示例#2

显示文件

文件： server_cleanup.py 项目： seanbeatty/sisyphus

def main(
    storage_name,
    dataset_type,
    dataset_id=None,
    tag_name=None,
    check_remote=None,
    dry_run=False,
):
    logging.info('cleanup up storage {}'.format(storage_name))

    if check_remote:
        logging.info('checking remote {}'.format(check_remote))
    else:
        logging.warning('not checking remote')

    tantalus_api = TantalusApi()

    storage_client = tantalus_api.get_storage_client(storage_name)

    remote_client = None
    if check_remote is not None:
        remote_client = tantalus_api.get_storage_client(check_remote)

    if dataset_id is None and tag_name is None:
        raise ValueError('require either dataset id or tag name')

    if dataset_id is not None and tag_name is not None:
        raise ValueError('require exactly one of dataset id or tag name')

    if dataset_id is not None:
        logging.info('cleanup up dataset {}, {}'.format(
            dataset_id, dataset_type))
        datasets = tantalus_api.list(dataset_type, id=dataset_id)

    if tag_name is not None:
        logging.info('cleanup up tag {}'.format(tag_name))
        datasets = tantalus_api.list(dataset_type, tags__name=tag_name)

    total_data_size = 0
    file_num_count = 0

    for dataset in datasets:
        logging.info('checking dataset with id {}, name {}'.format(
            dataset['id'], dataset['name']))

        # Optionally skip datasets not present and intact on the remote storage
        if check_remote is not None:
            if not tantalus_api.is_dataset_on_storage(
                    dataset['id'], 'sequencedataset', check_remote):
                logging.warning(
                    'not deleting dataset with id {}, not on remote storage '.
                    format(dataset['id'], check_remote))
                continue

            # For each file instance on the remote, check if it exists and has the correct size in tantalus
            remote_file_size_check = True
            for file_instance in tantalus_api.get_dataset_file_instances(
                    dataset['id'], dataset_type, check_remote):
                try:
                    tantalus_api.check_file(file_instance)
                except DataError:
                    logging.exception('check file failed')
                    remote_file_size_check = False

            # Skip this dataset if any files failed
            if not remote_file_size_check:
                logging.warning(
                    "skipping dataset {} that failed check on {}".format(
                        dataset['id'], check_remote))
                continue

        # Check consistency with the removal storage
        file_size_check = True
        for file_instance in tantalus_api.get_dataset_file_instances(
                dataset['id'], dataset_type, storage_name):
            try:
                tantalus_api.check_file(file_instance)
            except DataError:
                logging.exception('check file failed')
                file_size_check = False

        # Skip this dataset if any files failed
        if not file_size_check:
            logging.warning(
                "skipping dataset {} that failed check on {}".format(
                    dataset['id'], storage_name))
            continue

        # Delete all files for this dataset
        for file_instance in tantalus_api.get_dataset_file_instances(
                dataset['id'], dataset_type, storage_name):
            if dry_run:
                logging.info(
                    "would delete file instance with id {}, filepath {}".
                    format(file_instance['id'], file_instance['filepath']))
            else:
                logging.info(
                    "deleting file instance with id {}, filepath {}".format(
                        file_instance['id'], file_instance['filepath']))
                tantalus_api.update("file_instance",
                                    id=file_instance['id'],
                                    is_deleted=True)
            total_data_size += file_instance['file_resource']['size']
            file_num_count += 1

    logging.info("deleted a total of {} files with size {} bytes".format(
        file_num_count, total_data_size))