Пример #1
0
def parse_smn_file(lines):
    """Parse a SMNCopyNumberCaller TSV file.

    Args:
        lines(iterable(str))

    Returns:
        list(sma_info_per_individual(dict))
    """
    individuals = []
    header = []

    for i, line in enumerate(lines):
        line = line.rstrip()
        if i == 0:
            # Header line
            header = line.split("\t")
        else:
            ind_info = dict(zip(header, line.split("\t")))
            smn_ind_info = {}
            smn_ind_info["sample_id"] = ind_info["Sample"]
            smn_ind_info["is_sma"] = make_bool(ind_info["isSMA"])
            smn_ind_info["is_sma_carrier"] = make_bool(ind_info["isCarrier"])
            smn_ind_info["smn1_cn"] = convert_number(ind_info["SMN1_CN"])
            smn_ind_info["smn2_cn"] = convert_number(ind_info["SMN2_CN"])
            smn_ind_info["smn2delta78_cn"] = convert_number(
                ind_info["SMN2delta7-8_CN"])
            smn_ind_info["smn_27134_cn"] = convert_number(
                ind_info["g.27134T>G_CN"])

            individuals.append(smn_ind_info)

    return individuals
Пример #2
0
def test_make_bool_YES():
    ## GIVEN a empty string
    a = "YES"
    ## WHEN converting to a boolean
    res = make_bool(a)
    ## THEN assert it is True
    assert res is True
Пример #3
0
def test_make_bool_nonsense():
    ## GIVEN a nonsense string
    a = "nonsense asdlkfjalk"
    ## WHEN converting to a boolean
    res = make_bool(a)
    ## THEN assert it is False
    assert res is False
Пример #4
0
def test_make_bool_empty():
    ## GIVEN a empty string
    a = ""
    ## WHEN converting to a boolean
    res = make_bool(a)
    ## THEN assert it is False
    assert res is False
Пример #5
0
def test_make_bool_zero():
    ## GIVEN a string representing a boolean
    a = "0"
    ## WHEN converting to a boolean
    res = make_bool(a)
    ## THEN assert it is False
    assert res is False
Пример #6
0
def test_make_bool_one():
    ## GIVEN a string representing a boolean
    a = "1"
    ## WHEN making a boolean
    res = make_bool(a)
    ## THEN assert it is True
    assert res is True
Пример #7
0
def parse_peddy_sex_check(lines):
    """Parse a .ped_check.csv file

    Args:
        lines(iterable(str))

    Returns:
        sex_check(list(dict))
    """
    sex_check = []
    header = []
    for i, line in enumerate(lines):
        line = line.rstrip()
        if i == 0:
            # Header line
            header = line.lstrip("#").split(",")
        else:
            ind_info = dict(zip(header, line.split(",")))

            # boolean indicating wether there is a mismatch between X
            # genotypes and ped sex.
            ind_info["error"] = make_bool(ind_info.get("error"))

            # number of homozygous-alternate calls
            ind_info["hom_alt_count"] = convert_number(
                ind_info["hom_alt_count"])
            # number of homozygous-reference calls
            ind_info["hom_ref_count"] = convert_number(
                ind_info["hom_ref_count"])
            # number of heterozygote calls
            ind_info["het_count"] = convert_number(ind_info["het_count"])

            # ratio of het_count / hom_alt_count. Low for males, high for females
            ind_info["het_ratio"] = convert_number(ind_info["het_ratio"])

            sex_check.append(ind_info)

    return sex_check
Пример #8
0
def parse_peddy_sex_check(lines):
    """Parse a .ped_check.csv file
    
    Args:
        lines(iterable(str))
    
    Returns:
        sex_check(list(dict))
    """
    sex_check = []
    header = []
    for i,line in enumerate(lines):
        line = line.rstrip()
        if i == 0:
            # Header line
            header = line.lstrip('#').split(',')
        else:
            ind_info = dict(zip(header, line.split(',')))

            # boolean indicating wether there is a mismatch between X 
            # genotypes and ped sex.
            ind_info['error'] = make_bool(ind_info.get('error'))

            # number of homozygous-alternate calls
            ind_info['hom_alt_count'] = convert_number(ind_info['hom_alt_count'])
            #number of homozygous-reference calls
            ind_info['hom_ref_count'] = convert_number(ind_info['hom_ref_count'])
            # number of heterozygote calls
            ind_info['het_count'] = convert_number(ind_info['het_count'])

            # ratio of het_count / hom_alt_count. Low for males, high for females
            ind_info['het_ratio'] = convert_number(ind_info['het_ratio'])

            sex_check.append(ind_info)

    return sex_check
Пример #9
0
def parse_peddy_ped_check(lines):
    """Parse a .ped_check.csv file
    
    Args:
        lines(iterable(str))
    
    Returns:
        ped_check(list(dict))
    """
    ped_check = []
    header = []
    for i,line in enumerate(lines):
        line = line.rstrip()
        if i == 0:
            # Header line
            header = line.lstrip('#').split(',')
        else:
            pair_info = dict(zip(header, line.split(',')))
            
            # the number of sites at which sample_a was heterozygous
            pair_info['hets_a'] = convert_number(pair_info['hets_a'])
            
            # the number of sites at which sample_b was heterozygous
            pair_info['hets_b'] = convert_number(pair_info['hets_b'])
            
            # the number of sites at which the 2 samples shared no alleles 
            # (should approach 0 for parent-child pairs).
            pair_info['ibs0'] = convert_number(pair_info['ibs0'])
            
            # the number of sites and which the 2 samples where both 
            # hom-ref, both het, or both hom-alt.
            pair_info['ibs2'] = convert_number(pair_info['ibs2'])

            # the number of sites that was used to predict the relatedness.
            pair_info['n'] = convert_number(pair_info['n'])
            
            # the relatedness reported in the ped file.
            pair_info['rel'] = convert_number(pair_info['rel'])

            # the relatedness reported in the ped file.
            pair_info['pedigree_relatedness'] = convert_number(pair_info['pedigree_relatedness'])
            
            # difference between the preceding 2 colummns.
            pair_info['rel_difference'] = convert_number(pair_info['rel_difference'])

            # the number of sites at which both samples were hets.
            pair_info['shared_hets'] = convert_number(pair_info['shared_hets'])

            # boolean indicating that this pair is a parent-child pair 
            # according to the ped file.
            pair_info['pedigree_parents'] = make_bool(pair_info.get('pedigree_parents'))
            
            # boolean indicating that this pair is expected to be a parent-child
            # pair according to the ibs0 (< 0.012) calculated from the genotypes.
            pair_info['predicted_parents'] = make_bool(pair_info.get('predicted_parents'))

            # boolean indicating that the preceding 2 columns do not match
            pair_info['parent_error'] = make_bool(pair_info.get('parent_error'))

            #  boolean indicating that rel > 0.75 and ibs0 < 0.012
            pair_info['sample_duplication_error'] = make_bool(pair_info.get('sample_duplication_error'))
            
            
            ped_check.append(pair_info)

    return ped_check
Пример #10
0
def parse_peddy_ped_check(lines):
    """Parse a .ped_check.csv file

    Args:
        lines(iterable(str))

    Returns:
        ped_check(list(dict))
    """
    ped_check = []
    header = []
    for i, line in enumerate(lines):
        line = line.rstrip()
        if i == 0:
            # Header line
            header = line.lstrip("#").split(",")
        else:
            pair_info = dict(zip(header, line.split(",")))

            # the number of sites at which sample_a was heterozygous
            pair_info["hets_a"] = convert_number(pair_info["hets_a"])

            # the number of sites at which sample_b was heterozygous
            pair_info["hets_b"] = convert_number(pair_info["hets_b"])

            # the number of sites at which the 2 samples shared no alleles
            # (should approach 0 for parent-child pairs).
            pair_info["ibs0"] = convert_number(pair_info["ibs0"])

            # the number of sites and which the 2 samples where both
            # hom-ref, both het, or both hom-alt.
            pair_info["ibs2"] = convert_number(pair_info["ibs2"])

            # the number of sites that was used to predict the relatedness.
            pair_info["n"] = convert_number(pair_info["n"])

            # the relatedness reported in the ped file.
            pair_info["rel"] = convert_number(pair_info["rel"])

            # the relatedness reported in the ped file.
            pair_info["pedigree_relatedness"] = convert_number(
                pair_info["pedigree_relatedness"])

            # difference between the preceding 2 colummns.
            pair_info["rel_difference"] = convert_number(
                pair_info["rel_difference"])

            # the number of sites at which both samples were hets.
            pair_info["shared_hets"] = convert_number(pair_info["shared_hets"])

            # boolean indicating that this pair is a parent-child pair
            # according to the ped file.
            pair_info["pedigree_parents"] = make_bool(
                pair_info.get("pedigree_parents"))

            # boolean indicating that this pair is expected to be a parent-child
            # pair according to the ibs0 (< 0.012) calculated from the genotypes.
            pair_info["predicted_parents"] = make_bool(
                pair_info.get("predicted_parents"))

            # boolean indicating that the preceding 2 columns do not match
            pair_info["parent_error"] = make_bool(
                pair_info.get("parent_error"))

            #  boolean indicating that rel > 0.75 and ibs0 < 0.012
            pair_info["sample_duplication_error"] = make_bool(
                pair_info.get("sample_duplication_error"))

            ped_check.append(pair_info)

    return ped_check