示例#1
0
def test_sv_end_BND():
    """Test the function that calculates the end coordinate of a BND structural variant.

    Example:
    2	321681	bnd_W	G	G]17:198982]	6	PASS	SVTYPE=BND;MATEID=bnd_Y	GT	0/1
    """
    end = sv_end(pos=321681, alt=ALT, svend=None, svlen=None)
    assert end == 198981
示例#2
0
def test_sv_end_SVEND():
    """Test the function that calculates the end coordinate of a structural variant in the presence of SVEND.

    Example:
    2 321682 . T <DEL> 6 PASS SVTYPE=DEL;END=321887;SVLEN=-205;CIPOS=-56,20;CIEND=-10,62 GT:GQ 0/1:12

    """
    end = sv_end(pos=321682, alt="<DEL>", svend=321887, svlen=-205)
    assert end == 321886
示例#3
0
def add_variants(database, vcf_obj, samples, assembly, dataset_id, nr_variants):
    """Build variant objects from a cyvcf2 VCF iterator

    Accepts:
        database(pymongo.database.Database)
        vcf_obj(cyvcf2.VCF): a VCF object
        samples(set): set of samples to add variants for
        assembly(str): chromosome build
        dataset_id(str): dataset id
        nr_variant(int): number of variants contained in VCF file
    Returns:
        inserted_vars, samples(tuple): (int,list)

    """
    LOG.info("Parsing variants..\n")
    new_samples = set()

    # Collect position to check genotypes for (only samples provided by user)
    gt_positions = []
    for i, sample in enumerate(vcf_obj.samples):
        if sample in samples:
            gt_positions.append(i)

    vcf_samples = vcf_obj.samples

    inserted_vars = 0
    with Bar("Processing", max=nr_variants) as bar:
        for vcf_variant in vcf_obj:
            chrom = vcf_variant.CHROM.replace("chr", "")
            if chrom not in CHROMOSOMES:
                LOG.warning(
                    f"chromosome '{vcf_variant.CHROM}' not included in canonical chromosome list, skipping it."
                )
                continue

            # Check if variant was called in provided samples
            sample_calls = variant_called(
                vcf_samples, gt_positions, vcf_variant.gt_types
            )

            if sample_calls == {}:
                continue  # variant was not called in samples of interest

            parsed_variant = dict(
                chromosome=chrom,
                start=vcf_variant.start,  # 0-based coordinate
                end=vcf_variant.end,  # 0-based coordinate
                reference_bases=vcf_variant.REF,
                alternate_bases=vcf_variant.ALT,
            )

            if vcf_variant.var_type == "sv":
                sv_type = vcf_variant.INFO["SVTYPE"]
                parsed_variant["variant_type"] = sv_type

                alt = vcf_variant.ALT[0]

                # Check if a better variant end can be extracted from INFO field
                end = sv_end(
                    pos=vcf_variant.POS,
                    alt=alt,
                    svend=vcf_variant.INFO.get("END"),
                    svlen=vcf_variant.INFO.get("SVLEN"),
                )
                parsed_variant["end"] = end

                if sv_type == "BND":
                    parsed_variant["mate_name"] = bnd_mate_name(alt, chrom)

            else:
                parsed_variant["variant_type"] = vcf_variant.var_type.upper()

            dataset_dict = {dataset_id: {"samples": sample_calls}}
            # Create standard variant object with specific _id
            variant = Variant(parsed_variant, dataset_dict, assembly)

            # Load variant into database or update an existing one with new samples and dataset
            result = add_variant(
                database=database, variant=variant, dataset_id=dataset_id
            )
            if result is not None:
                inserted_vars += 1

            bar.next()

    return inserted_vars