Python TantalusApi.get_file_resource_filename示例

编程语言: Python

命名空间/包名称: dbclients.tantalus

类/类型: TantalusApi

方法/功能: get_file_resource_filename

hotexamples.com的示例: 2

Python TantalusApi.get_file_resource_filename - 已找到2个示例。这些是从开源项目中提取的最受好评的dbclients.tantalus.TantalusApi.get_file_resource_filename现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

TantalusApi(30)

get(26)

list(15)

get_storage_client(12)

update(10)

add_file(6)

get_or_create(6)

get_dataset_file_instances(5)

get_filepath(4)

get_storage(3)

get_file_resource_filename(2)

delete(2)

is_dataset_on_storage(2)

create(2)

check_file(2)

get_dataset_file_resources(1)

get_cache_client(1)

is_sequence_dataset_on_storage(1)

swap_file(1)

tag(1)

update_file(1)

示例#1

显示文件

文件： bam_import.py 项目： sanansakura/sisyphus

def import_bam(storage_name,
               bam_file_path,
               sample=None,
               library=None,
               lane_infos=None,
               read_type=None,
               ref_genome=None,
               tag_name=None,
               update=False):
    """
    Imports bam into tantalus

    Args:
        storage_name:   (string) name of destination storage
        bam_file_path:  (string) filepath to bam on destination storage
        sample:         (dict) contains sample_id
        library:        (dict) contains library_id, library_type, index_format
        lane_infos:     (dict) contains flowcell_id, lane_number, 
                        adapter_index_sequence, sequencing_cenre, read_type, 
                        reference_genome, aligner
        read_type:      (string) read type for the run
        tag_name:       (string)
        update:         (boolean)
    Returns:
        sequence_dataset:   (dict) sequence dataset created on tantalus
    """
    tantalus_api = TantalusApi()

    # Get a url allowing access regardless of whether the file
    # is in cloud or local storage
    storage_client = tantalus_api.get_storage_client(storage_name)
    bam_filename = tantalus_api.get_file_resource_filename(
        storage_name, bam_file_path)
    bam_url = storage_client.get_url(bam_filename)

    bam_header = pysam.AlignmentFile(bam_url).header
    bam_header_info = get_bam_header_info(bam_header)

    if ref_genome is None:
        ref_genome = get_bam_ref_genome(bam_header)

    aligner_name = get_bam_aligner_name(bam_header)

    logging.info(
        f"bam header shows reference genome {ref_genome} and aligner {aligner_name}"
    )

    bai_file_path = None
    if storage_client.exists(bam_filename + ".bai"):
        bai_file_path = bam_file_path + ".bai"
    else:
        logging.info(f"no bam index found at {bam_filename + '.bai'}")

    # If no sample was specified assume it exists in tantalus and
    # search for it based on header info
    if sample is None:
        if len(bam_header_info["sample_ids"]) != 1:
            raise ValueError(
                f"found sample_ids={bam_header_info['sample_ids']}, please specify override sample id"
            )
        sample_id = list(bam_header_info["sample_ids"])[0]
        sample = tantalus_api.get('sample', sample_id=sample_id)

    # If no library was specified assume it exists in tantalus and
    # search for it based on header info
    if library is None:
        if len(bam_header_info["library_ids"]) != 1:
            raise ValueError(
                f"found library_ids={bam_header_info['library_ids']}, please specify override library id"
            )
        library_id = list(bam_header_info["library_ids"])[0]
        library = tantalus_api.get('dna_library', library_id=library_id)

    # Default paired end reads
    if read_type is None:
        read_type = 'P'

    # If no lane infos were specified create them from header info
    if lane_infos is None:
        lane_infos = []
        for lane in bam_header_info["sequence_lanes"]:
            lane_info = {
                "flowcell_id": lane["flowcell_id"],
                "lane_number": lane["lane_number"],
                "library_id": lane["library_id"],
                "sequencing_centre": lane["sequencing_centre"],
                "read_type": read_type,
            }
            lane_infos.append(lane_info)

    # Add the sequence dataset to Tantalus
    sequence_dataset = add_sequence_dataset(
        tantalus_api,
        storage_name=storage_name,
        sample=sample,
        library=library,
        dataset_type="BAM",
        sequence_lanes=lane_infos,
        bam_file_path=bam_file_path,
        reference_genome=ref_genome,
        aligner=aligner_name,
        bai_file_path=bai_file_path,
        tag_name=tag_name,
        update=update,
    )

    return sequence_dataset

示例#2

显示文件

文件： add_generic_results.py 项目： seanbeatty/sisyphus

def add_generic_results(filepaths,
                        storage_name,
                        results_name,
                        results_type,
                        results_version,
                        sample_ids=(),
                        library_ids=(),
                        analysis_pk=None,
                        recursive=False,
                        tag_name=None,
                        update=False,
                        remote_storage_name=None):

    tantalus_api = TantalusApi()
    storage_client = tantalus_api.get_storage_client(storage_name)

    sample_pks = []
    for sample_id in sample_ids:
        samples = tantalus_api.get(
            "sample",
            sample_id=sample_id,
        )
        sample_pks.append(samples['id'])

    library_pks = []
    for library_id in library_ids:
        librarys = tantalus_api.get(
            "dna_library",
            library_id=library_id,
        )
        library_pks.append(librarys['id'])

    #Add the file resource to tantalus
    file_resource_pks = []
    for filepath in filepaths:
        if recursive:
            logging.info("Recursing directory {}".format(filepath))
            filename_prefix = tantalus_api.get_file_resource_filename(
                storage_name, filepath)
            add_filepaths = []
            for filename in storage_client.list(filename_prefix):
                add_filepaths.append(
                    tantalus_api.get_filepath(storage_name, filename))

        else:
            add_filepaths = [filepath]

        for add_filepath in add_filepaths:
            logging.info(
                "Adding file resource for {} to Tantalus".format(add_filepath))
            resource, instance = tantalus_api.add_file(
                storage_name=storage_name,
                filepath=add_filepath,
                update=update,
            )
            file_resource_pks.append(resource["id"])

    results_dataset_fields = dict(
        name=results_name,
        results_type=results_type,
        results_version=results_version,
        analysis=analysis_pk,
        samples=sample_pks,
        libraries=library_pks,
        file_resources=file_resource_pks,
    )

    #Add the dataset to tantalus
    try:
        results_id = tantalus_api.get(
            "results", name=results_dataset_fields["name"])["id"]
    except NotFoundError:
        results_id = None

    if update and results_id is not None:
        logging.warning("results dataset {} exists, updating".format(
            results_dataset_fields["name"]))
        results_dataset = tantalus_api.update("results",
                                              id=results_id,
                                              **results_dataset_fields)

    else:
        logging.info("creating results dataset {}".format(
            results_dataset_fields["name"]))
        results_dataset = tantalus_api.get_or_create("results",
                                                     **results_dataset_fields)

    if tag_name is not None:
        tantalus_api.tag(tag_name, resultsdataset_set=[results_id])

    logging.info("Succesfully created sequence dataset with ID {}".format(
        results_dataset["id"]))

    if remote_storage_name is not None:
        transfer_files.transfer_dataset(tantalus_api, results_dataset['id'],
                                        "resultsdataset", storage_name,
                                        remote_storage_name)

    return results_dataset