Exemplo n.º 1
0
def load_variants(
    adapter,
    vcf_obj,
    case_obj,
    skip_case_id=False,
    gq_treshold=None,
    max_window=3000,
    variant_type="snv",
    genome_build=None,
):
    """Load variants for a family into the database.

    Args:
        adapter (loqusdb.plugins.Adapter): initialized plugin
        case_obj(Case): dict with case information
        nr_variants(int)
        skip_case_id (bool): whether to include the case id on variant level
                             or not
        gq_treshold(int)
        max_window(int): Specify the max size for sv windows
        variant_type(str): 'sv' or 'snv'

    Returns:
        nr_inserted(int)
    """
    if variant_type == "snv":
        nr_variants = case_obj["nr_variants"]
    else:
        nr_variants = case_obj["nr_sv_variants"]

    nr_inserted = 0
    case_id = case_obj["case_id"]
    if skip_case_id:
        case_id = None
    # Loop over the variants in the vcf
    with click.progressbar(vcf_obj,
                           label="Inserting variants",
                           length=nr_variants) as bar:

        variants = (build_variant(variant,
                                  case_obj,
                                  case_id,
                                  gq_treshold,
                                  genome_build=genome_build)
                    for variant in bar)

    if variant_type == "sv":
        for sv_variant in variants:
            if not sv_variant:
                continue
            adapter.add_structural_variant(variant=sv_variant,
                                           max_window=max_window)
            nr_inserted += 1

    if variant_type == "snv":
        nr_inserted = adapter.add_variants(variants)

    LOG.info("Inserted %s variants of type %s", nr_inserted, variant_type)

    return nr_inserted
def test_format_indel(del_variant, case_obj):
    ## GIVEN a SV deletion
    variant = del_variant
    case_id = case_obj["case_id"]
    ## WHEN parsing the variant
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)
    expected_id = "_".join(
        [variant.CHROM,
         str(variant.POS), variant.REF, variant.ALT[0]])

    ## THEN assert the sv is parsed correct
    assert formated_variant
    assert formated_variant["variant_id"] == expected_id
    assert formated_variant["chrom"] == variant.CHROM
    assert formated_variant["end_chrom"] == variant.CHROM
    assert formated_variant["pos"] == variant.POS
    assert formated_variant["end"] == variant.INFO["END"]
    assert formated_variant["sv_len"] == abs(variant.INFO["SVLEN"])

    assert formated_variant["ref"] == variant.REF
    assert formated_variant["alt"] == variant.ALT[0]
    assert formated_variant["sv_type"] == "DEL"
    assert formated_variant["case_id"] == case_id
    assert formated_variant["homozygote"] == 0
    assert formated_variant["hemizygote"] == 0
Exemplo n.º 3
0
def variant_obj(request, het_variant, ind_positions, individuals):
    return build_variant(
        variant=het_variant,
        individuals=individuals,
        ind_positions=ind_positions,
        case_id="test",
        gq_treshold=None,
    )
Exemplo n.º 4
0
def test_format_variant_no_gq(variant_no_gq, case_obj):
    ## GIVEN a variant without GQ
    variant = variant_no_gq
    case_id = case_obj["case_id"]
    ## WHEN parsing the variant using a GQ treshold
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id,
                                     gq_treshold=20)
    ## THEN assert that None is returned since requirements are not fulfilled
    assert formated_variant is None
Exemplo n.º 5
0
def test_format_variant_chr_prefix(variant_chr, case_obj):
    ## GIVEN a variant with 'chr' prefix in chromosome name
    variant = variant_chr
    assert variant.CHROM.startswith("chr")
    case_id = case_obj["case_id"]
    ## WHEN parsing the variant using a GQ treshold
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id,
                                     gq_treshold=20)
    ## THEN assert that the 'chr' part has been stripped away
    assert formated_variant["chrom"] == variant.CHROM[3:]
Exemplo n.º 6
0
def test_format_variant_no_family_id(het_variant, case_obj):
    ## GIVEN a parsed variant
    variant = het_variant
    case_id = case_obj["case_id"]
    ## WHEN parsing the variant telling that 'case_id' is None
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=None)
    ## THEN assert that case_id was not added
    assert formated_variant.get("case_id") == None
    assert formated_variant["homozygote"] == 0
    assert formated_variant["hemizygote"] == 0
Exemplo n.º 7
0
def test_format_hemizygote_variant(hem_variant, case_obj):
    ## GIVEN a parsed hemizygous variant
    variant = hem_variant
    case_id = case_obj["case_id"]

    ## WHEN parsing the variant
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    ## THEN assert that hemizygote count is 1
    assert formated_variant["homozygote"] == 0
    assert formated_variant["hemizygote"] == 1
Exemplo n.º 8
0
def test_format_homozygote_variant(hom_variant, case_obj):
    ## GIVEN a parsed hom variant
    variant = hom_variant
    case_id = case_obj['case_id']

    ## WHEN parsing the variant
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    ## THEN assert that the variant has hom count
    assert formated_variant['homozygote'] == 1
    assert formated_variant['hemizygote'] == 0
Exemplo n.º 9
0
def test_get_insertion(small_insert_variant, mongo_adapter, case_obj):
    adapter = mongo_adapter
    ## GIVEN a mongo adapter with a small insertion
    variant = small_insert_variant
    case_id = case_obj["case_id"]
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    adapter.add_case(case_obj)
    adapter.add_structural_variant(formated_variant)
    for variant_obj in adapter.db.structural_variant.find():
        assert variant_obj
Exemplo n.º 10
0
def test_get_translocation(translocation_variant, mongo_adapter, case_obj):
    adapter = mongo_adapter
    ## GIVEN a mongo adapter with a translocation
    variant = translocation_variant
    case_id = case_obj['case_id']
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    adapter.add_case(case_obj)
    adapter.add_structural_variant(formated_variant)
    for variant_obj in adapter.db.structural_variant.find():
        assert variant_obj
Exemplo n.º 11
0
def test_format_variant_no_call(variant_no_call, case_obj):
    ## GIVEN a parsed variant with no call in all individuals
    variant = variant_no_call
    case_id = case_obj["case_id"]

    for call in variant.gt_types:
        assert GENOTYPE_MAP[call] in ["no_call", "hom_ref"]

    ## WHEN parsing the variant
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    ## THEN assert that the result is None
    assert formated_variant is None
Exemplo n.º 12
0
def delete_structural_variants(adapter, vcf_obj, case_obj, case_id=None):
    """Delete structural variants for a case in the database

    Args:
        adapter(loqusdb.plugins.Adapter)
        vcf_obj(iterable(dict))
        ind_positions(dict)
        case_id(str)

    Returns:
        nr_deleted (int): Number of deleted variants"""

    case_id = case_id or case_obj["case_id"]
    nr_deleted = 0
    start_deleting = datetime.now()
    chrom_time = datetime.now()
    current_chrom = None
    new_chrom = None

    for variant in vcf_obj:
        formated_variant = build_variant(
            variant=variant,
            case_obj=case_obj,
            case_id=case_id,
        )

        if not formated_variant:
            continue

        new_chrom = formated_variant.get("chrom")
        adapter.delete_structural_variant(formated_variant)
        nr_deleted += 1

        if not current_chrom:
            LOG.info("Start deleting chromosome {}".format(new_chrom))
            current_chrom = new_chrom
            chrom_time = datetime.now()
            continue

        if new_chrom != current_chrom:
            LOG.info("Chromosome {0} done".format(current_chrom))
            LOG.info("Time to delete chromosome {0}: {1}".format(
                current_chrom,
                datetime.now() - chrom_time))
            LOG.info("Start deleting chromosome {0}".format(new_chrom))
            current_chrom = new_chrom

    return nr_deleted
Exemplo n.º 13
0
def test_load_translocation(translocation_variant, case_obj, mongo_adapter):
    adapter = mongo_adapter
    ## GIVEN a mongo adapter with a case
    variant = translocation_variant
    case_id = case_obj["case_id"]
    adapter.add_case(case_obj)

    ## WHEN loading a small insertion
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)
    adapter.add_structural_variant(formated_variant)

    ## THEN assert the object returned is correct
    variant_cluster = adapter.db.structural_variant.find_one()

    assert variant_cluster["families"] == [case_id]
Exemplo n.º 14
0
def test_load_insertion(small_insert_variant, mongo_adapter, case_obj):
    adapter = mongo_adapter
    ## GIVEN a mongo adapter with a case
    variant = small_insert_variant
    case_id = case_obj['case_id']
    adapter.add_case(case_obj)

    ## WHEN loading a small insertion
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)
    adapter.add_structural_variant(formated_variant)

    ## THEN assert the object returned is correct
    variant_cluster = adapter.db.structural_variant.find_one()

    assert variant_cluster['families'] == [case_id]
def test_format_translocation(translocation_variant, case_obj):
    ## GIVEN a small insertion (This means that the insertion is included in ALT field)
    variant = translocation_variant
    case_id = case_obj["case_id"]
    ## WHEN parsing the variant
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    ## THEN assert the sv is parsed correct
    assert formated_variant["chrom"] == variant.CHROM
    assert formated_variant["end_chrom"] == "11"
    assert formated_variant["pos"] == variant.POS
    assert formated_variant["end"] == 119123896
    assert formated_variant["sv_len"] == float("inf")

    assert formated_variant["ref"] == variant.REF
    assert formated_variant["alt"] == variant.ALT[0]
    assert formated_variant["sv_type"] == "BND"
def test_format_dup_tandem(duptandem_variant, case_obj):
    ## GIVEN a small insertion (This means that the insertion is included in ALT field)
    variant = duptandem_variant
    case_id = case_obj["case_id"]
    ## WHEN parsing the variant
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    ## THEN assert the sv is parsed correct
    assert formated_variant["chrom"] == variant.CHROM
    assert formated_variant["end_chrom"] == variant.CHROM
    assert formated_variant["pos"] == variant.POS
    assert formated_variant["end"] == variant.INFO["END"]
    assert formated_variant["sv_len"] == abs(variant.INFO["SVLEN"])

    assert formated_variant["ref"] == variant.REF
    assert formated_variant["alt"] == variant.ALT[0]
    assert formated_variant["sv_type"] == "DUP"
    def test_remove_one_SV(self, mongo_adapter, del_variant, case_obj):
        # GIVEN a database poulated with one SV
        db = mongo_adapter.db
        formated_variant = build_variant(del_variant,
                                         case_obj=case_obj,
                                         case_id=case_obj["case_id"])
        mongo_adapter.add_structural_variant(formated_variant)
        mongo_SV = db.structural_variant.find_one()
        mongo_identity = db.identity.find_one()
        assert mongo_SV is not None
        assert mongo_identity is not None
        # WHEN deleting SV
        mongo_adapter.delete_structural_variant(formated_variant)

        # THEN there should be no remaining SVs in the database
        mongo_SV = db.structural_variant.find_one()
        mongo_identity = db.indentity.find_one()
        assert mongo_SV is None
        assert mongo_identity is None
Exemplo n.º 18
0
def test_load_same_insertion_twice(small_insert_variant, mongo_adapter,
                                   case_obj):
    adapter = mongo_adapter
    ## GIVEN a mongo adapter with a case
    variant = small_insert_variant
    case_id = case_obj["case_id"]
    adapter.add_case(case_obj)

    ## WHEN loading a small insertion
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)
    adapter.add_structural_variant(formated_variant)
    formated_variant["case_id"] = "2"
    adapter.add_structural_variant(formated_variant)

    ## THEN assert the object returned is correct
    variant_cluster = adapter.db.structural_variant.find_one()

    assert set(variant_cluster["families"]) == set([case_id, "2"])
Exemplo n.º 19
0
def test_load_same_translocation_twice(translocation_variant, case_obj,
                                       mongo_adapter):
    adapter = mongo_adapter
    ## GIVEN a mongo adapter with a case
    variant = translocation_variant
    case_id = case_obj['case_id']
    adapter.add_case(case_obj)

    ## WHEN loading a small insertion
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)
    adapter.add_structural_variant(formated_variant)

    formated_variant['case_id'] = '2'
    adapter.add_structural_variant(formated_variant)

    ## THEN assert the object returned is correct
    variant_cluster = adapter.db.structural_variant.find_one()

    assert set(variant_cluster['families']) == set([case_id, '2'])
Exemplo n.º 20
0
def test_format_variant(het_variant, case_obj):
    ## GIVEN a parsed variant
    variant = het_variant
    case_id = case_obj['case_id']
    ## WHEN parsing the variant
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    expected_id = '_'.join(
        [variant.CHROM,
         str(variant.POS), variant.REF, variant.ALT[0]])

    ## THEN assert it was built in a correct way
    assert formated_variant
    assert formated_variant['variant_id'] == expected_id
    assert formated_variant['chrom'] == variant.CHROM
    assert formated_variant['pos'] == variant.POS
    assert formated_variant['ref'] == variant.REF
    assert formated_variant['alt'] == variant.ALT[0]
    assert formated_variant['case_id'] == case_id
    assert formated_variant['homozygote'] == 0
Exemplo n.º 21
0
def test_format_variant(het_variant, case_obj):
    ## GIVEN a parsed variant
    variant = het_variant
    case_id = case_obj["case_id"]
    ## WHEN parsing the variant
    formated_variant = build_variant(variant=variant,
                                     case_obj=case_obj,
                                     case_id=case_id)

    expected_id = "_".join(
        [variant.CHROM,
         str(variant.POS), variant.REF, variant.ALT[0]])

    ## THEN assert it was built in a correct way
    assert formated_variant
    assert formated_variant["variant_id"] == expected_id
    assert formated_variant["chrom"] == variant.CHROM
    assert formated_variant["pos"] == variant.POS
    assert formated_variant["ref"] == variant.REF
    assert formated_variant["alt"] == variant.ALT[0]
    assert formated_variant["case_id"] == case_id
    assert formated_variant["homozygote"] == 0
    def test_remove_one_of_two_SV(self, mongo_adapter, duptandem_variant,
                                  case_obj):
        # GIVEN a database poulated with one SV
        db = mongo_adapter.db
        formated_variant = build_variant(duptandem_variant,
                                         case_obj=case_obj,
                                         case_id=case_obj["case_id"])
        mongo_adapter.add_structural_variant(formated_variant)

        # Add second of same variant, changing the start and end position slightly
        formated_variant_ = copy.deepcopy(formated_variant)
        formated_variant_["pos"] = formated_variant_["pos"] + 2
        formated_variant_["end"] = formated_variant_["end"] - 1
        formated_variant_["case_id"] = "case_2"
        mongo_adapter.add_structural_variant(formated_variant_)

        # This should correspond to one structural variant document
        mongo_svs = list(db.structural_variant.find())
        assert len(mongo_svs) == 1
        mongo_sv = mongo_svs[0]
        assert mongo_sv[
            "pos_sum"] == formated_variant["pos"] + formated_variant_["pos"]
        # And two identity documents
        mongo_identities = list(db.identity.find())
        assert len(mongo_identities) == 2

        # WHEN deleting the variant from the first case
        mongo_adapter.delete_structural_variant(formated_variant)

        # THEN the SV document should have the pos_sum equal to the pos of the
        # SV from the second case
        mongo_svs = list(db.structural_variant.find())
        assert len(mongo_svs) == 1
        mongo_sv = mongo_svs[0]
        assert mongo_sv["pos_sum"] == formated_variant_["pos"]
        # And one identity documents
        mongo_identities = list(db.identity.find())
        assert len(mongo_identities) == 1
Exemplo n.º 23
0
def test_build_het_variant(het_variant, case_obj):
    variant_obj = build_variant(variant=het_variant, case_obj=case_obj)
    assert variant_obj["chrom"] == het_variant.CHROM
    assert variant_obj["homozygote"] == 0
    assert variant_obj["hemizygote"] == 0
Exemplo n.º 24
0
def delete_variants(adapter,
                    vcf_obj,
                    case_obj,
                    case_id=None,
                    genome_build=None):
    """Delete variants for a case in the database

    Args:
        adapter(loqusdb.plugins.Adapter)
        vcf_obj(iterable(dict))
        ind_positions(dict)
        case_id(str)

    Returns:
        nr_deleted (int): Number of deleted variants
    """
    case_id = case_id or case_obj["case_id"]
    nr_deleted = 0
    start_deleting = datetime.now()
    chrom_time = datetime.now()
    current_chrom = None
    new_chrom = None

    variant_list = []
    for variant in vcf_obj:
        formated_variant = build_variant(variant=variant,
                                         case_obj=case_obj,
                                         case_id=case_id,
                                         genome_build=genome_build)

        if not formated_variant:
            continue

        variant_list.append(formated_variant)

        new_chrom = formated_variant.get("chrom")
        # When there are enough variants in the variant list
        # They are passed to delete_variants
        if len(variant_list) == 10000:
            adapter.delete_variants(variant_list)
            variant_list.clear()

        nr_deleted += 1

        if not current_chrom:
            LOG.info("Start deleting chromosome {}".format(new_chrom))
            current_chrom = new_chrom
            chrom_time = datetime.now()
            continue

        if new_chrom != current_chrom:
            LOG.info("Chromosome {0} done".format(current_chrom))
            LOG.info("Time to delete chromosome {0}: {1}".format(
                current_chrom,
                datetime.now() - chrom_time))
            LOG.info("Start deleting chromosome {0}".format(new_chrom))
            current_chrom = new_chrom

    if variant_list:
        adapter.delete_variants(variant_list)
        variant_list.clear()

    return nr_deleted