def variants(ds, vcf, sample, panel): """Add variants from a VCF file to a dataset""" # make sure dataset id corresponds to a dataset in the database dataset = current_app.db["dataset"].find_one({"_id": ds}) if dataset is None: click.echo(f"Couldn't find any dataset with id '{ds}' in the database") raise click.Abort() # Check if required sample(s) are contained in the VCF vcf_samples = get_vcf_samples(vcf) if not all(samplen in vcf_samples for samplen in sample): click.echo( f"One or more provided sample was not found in the VCF file. Valida samples are: { ','.join(vcf_samples)}" ) raise click.Abort() custom_samples = set(sample) # set of samples provided by users filter_intervals = None if len(panel) > 0: # create BedTool panel with genomic intervals to filter VCF with filter_intervals = merge_intervals(list(panel)) vcf_obj = extract_variants(vcf_file=vcf, samples=custom_samples, filter=filter_intervals) if vcf_obj is None: raise click.Abort() nr_variants = count_variants(vcf_obj) if nr_variants == 0: click.echo("Provided VCF file doesn't contain any variant") raise click.Abort() vcf_obj = extract_variants(vcf_file=vcf, samples=custom_samples, filter=filter_intervals) # ADD variants added = add_variants( database=current_app.db, vcf_obj=vcf_obj, samples=custom_samples, assembly=dataset["assembly_id"], dataset_id=ds, nr_variants=nr_variants, ) click.echo(f"{added} variants loaded into the database") if added > 0: # Update dataset object accordingly update_dataset(database=current_app.db, dataset_id=ds, samples=custom_samples, add=True)
def test_merge_demo_intervals(): """Test function using pyBedTools for merging intervals from one or more panels using demo intervals""" a = pybedtools.BedTool(panel1_path) assert len(a) == 4 b = pybedtools.BedTool(panel2_path) assert len(b) == 3 merged_bed = merge_intervals([a, b]) assert len( merged_bed) == len(a) + len(b) - 1 # a and b have a shared interval
def test_merge_intervals(): """Test function using pyBedTools for merging intervals from one or more panels""" a = pybedtools.example_bedtool("a.bed") # path to test file a # This file looks like this: # chr1 1 100 feature1 0 + # chr1 100 200 feature2 0 + # chr1 150 500 feature3 0 - # chr1 900 950 feature4 0 + assert len(a) == 4 b = pybedtools.example_bedtool("b.bed") # path to test file b # This file looks like this: # chr1 155 200 feature5 0 - # chr1 800 901 feature6 0 + assert len(b) == 2 merged_bed = merge_intervals([a, b]) assert len(merged_bed) == 2