def test_sv_end_BND(): """Test the function that calculates the end coordinate of a BND structural variant. Example: 2 321681 bnd_W G G]17:198982] 6 PASS SVTYPE=BND;MATEID=bnd_Y GT 0/1 """ end = sv_end(pos=321681, alt=ALT, svend=None, svlen=None) assert end == 198981
def test_sv_end_SVEND(): """Test the function that calculates the end coordinate of a structural variant in the presence of SVEND. Example: 2 321682 . T <DEL> 6 PASS SVTYPE=DEL;END=321887;SVLEN=-205;CIPOS=-56,20;CIEND=-10,62 GT:GQ 0/1:12 """ end = sv_end(pos=321682, alt="<DEL>", svend=321887, svlen=-205) assert end == 321886
def add_variants(database, vcf_obj, samples, assembly, dataset_id, nr_variants): """Build variant objects from a cyvcf2 VCF iterator Accepts: database(pymongo.database.Database) vcf_obj(cyvcf2.VCF): a VCF object samples(set): set of samples to add variants for assembly(str): chromosome build dataset_id(str): dataset id nr_variant(int): number of variants contained in VCF file Returns: inserted_vars, samples(tuple): (int,list) """ LOG.info("Parsing variants..\n") new_samples = set() # Collect position to check genotypes for (only samples provided by user) gt_positions = [] for i, sample in enumerate(vcf_obj.samples): if sample in samples: gt_positions.append(i) vcf_samples = vcf_obj.samples inserted_vars = 0 with Bar("Processing", max=nr_variants) as bar: for vcf_variant in vcf_obj: chrom = vcf_variant.CHROM.replace("chr", "") if chrom not in CHROMOSOMES: LOG.warning( f"chromosome '{vcf_variant.CHROM}' not included in canonical chromosome list, skipping it." ) continue # Check if variant was called in provided samples sample_calls = variant_called( vcf_samples, gt_positions, vcf_variant.gt_types ) if sample_calls == {}: continue # variant was not called in samples of interest parsed_variant = dict( chromosome=chrom, start=vcf_variant.start, # 0-based coordinate end=vcf_variant.end, # 0-based coordinate reference_bases=vcf_variant.REF, alternate_bases=vcf_variant.ALT, ) if vcf_variant.var_type == "sv": sv_type = vcf_variant.INFO["SVTYPE"] parsed_variant["variant_type"] = sv_type alt = vcf_variant.ALT[0] # Check if a better variant end can be extracted from INFO field end = sv_end( pos=vcf_variant.POS, alt=alt, svend=vcf_variant.INFO.get("END"), svlen=vcf_variant.INFO.get("SVLEN"), ) parsed_variant["end"] = end if sv_type == "BND": parsed_variant["mate_name"] = bnd_mate_name(alt, chrom) else: parsed_variant["variant_type"] = vcf_variant.var_type.upper() dataset_dict = {dataset_id: {"samples": sample_calls}} # Create standard variant object with specific _id variant = Variant(parsed_variant, dataset_dict, assembly) # Load variant into database or update an existing one with new samples and dataset result = add_variant( database=database, variant=variant, dataset_id=dataset_id ) if result is not None: inserted_vars += 1 bar.next() return inserted_vars