def check_if_there_is_allele_with_4_diff_values(fam_d, alleles_names, invalid_cases): """ if there are no parents, and there isn't an allele with 4 different values in the children, the algorithm could not be executed (because when no parents, we rely on the assumption that there are 4 different in one allele, at least) it's not an "invalid" input, but we reject the family because we can not analyze it """ four_different_values = False # flag to know if there is allele with 4 different values for al_name in alleles_names: if four_different_values: # if there is allele with 4 diff, do not need to check more return alleles_values = Als() for child in fam_d: # no parents, according the condition in the call to this function child_alleles = fam_d[child][al_name] if any(child_alleles): # merge only if there is data in 'child_alleles' # merge the values of the current alleles_names _child_ to the values of the other children in this allele # e.g. : alleles_values: [02, 03]. child_alleles: [02:01, 04]. so the merging: [02, 03, 04] # note: the merging may save the low-res values (02 instead of 02:01), # but it's not matter, because just need the values amount alleles_values = alleles_values.merge(child_alleles) # after go over on all the children values in the current allele, check if there are 4 values if len(alleles_values) == 4: four_different_values = True if not four_different_values: invalid_cases.append(('6', 'All')) # no parents, and no alleles_names with 4 diff values (algorithm can not executed)
def __init__(self, alleles_names): """ create dict for each hoplotype, with empty Als (=special list for alleles_names) for each allele: {A: [], B: [], ...} :param alleles_names: alleles_names """ self.hap1 = {} self.hap2 = {} for al_name in alleles_names: self.hap1[al_name] = Als() self.hap2[al_name] = Als()
def create_merged_allele_values(children, al_name): """ create a list (Als) with all the different value in children data about specific allele for example: if _child_ 1 has A : [02:01, 03:04], _child_ 2: A: [05, 03], _child_ 3: A: [02, 07], so lst will be [02:01, 03:04, 05, 07] :param children: children dict :param al_name: name of specific allele """ lst = Als() for child in children: lst = lst.merge(children[child][al_name]) return lst
def check_too_much_alleles(fam_d, alleles_names, invalid_cases): """ check if there are too much alleles_names in the family (more than 4 in an allele) """ for al_name in alleles_names: lst = Als() for fam_member in fam_d: # [F, M, 1 ...] if any(fam_d[fam_member][al_name]): # not empty # lst = fam_d[fam_member][al_name].merge(lst) lst = lst.merge(fam_d[fam_member][al_name]) if len(lst) > 4: invalid_cases.append(('4', 'All')) # Too many alleles_names
def convert_data_to_Als(fam_dict): """ convert, for a family, the alleles_names data format: from a list to an Als (like list, just adjusted to alleles_names. see 'Als' class documentation) :param fam_dict: family dict """ for fam_member in fam_dict: # F, M, 1 ... for allele_name in fam_dict[fam_member]: # A, B ... al1 = fam_dict[fam_member][allele_name][0] # first allele al2 = fam_dict[fam_member][allele_name][1] # second allele new_format = Als() # new object of Als new_format.extend([al1, al2]) # add alleles_names data fam_dict[fam_member].update({allele_name: new_format}) # update the data in the dict to be in Als format
def remove_data_if_just_one_allele_full(h1, h2): """ GRIMM cannot handle with case of allele that have data in one haplotype but no data in the second haplotype for example: A*02:01+A*ZZZZ (ZZZZ means empty). so in alleles_names like this, we delete the data from the allele in the full haplotype - pay attention: this function is called after the function 'duplicate_hap_if_one_empty', because otherwise, we might delete akk the data from the haplotypes :param h1: first haplotype :param h2: second haplotype """ for (key1, value1), (key2, value2) in zip(h1.items(), h2.items()): if value1.empty_Als() and not value2.empty_Als(): h2[key2] = Als() elif value2.empty_Als() and not value1.empty_Als(): h1[key1] = Als()
def validate(hap_1, hap_2, member, is_serology): # todo: check it !! """ compare two haplotype to person (2 from one parent if compare to parent, and 1 from each parent if compare to _child_) :param hap_1: first haplotype :param hap_2: second haplotype :param member: family member :param is_serology: flag, if serology, the validate is checked in another way :return: True if consistency, False otherwise """ hap_1 = gl_string_to_dict(hap_1) hap_2 = gl_string_to_dict(hap_2) if not is_serology: for allele_name, allele_values in member.items(): val_member1, val_member2, val_hap1, val_hap2 = \ allele_values[0], allele_values[1], hap_1[allele_name], hap_2[allele_name] success_option1 = is_equal(val_member1, val_hap1) and is_equal( val_member2, val_hap2) success_option2 = is_equal(val_member1, val_hap2) and is_equal( val_member2, val_hap1) if not (success_option1 or success_option2): return False return True else: # serology data. could be more than 2 options in 'member[allele_name]' pairs_consistent = [False] * len(member.keys()) for idx_allele, (allele_name, allele_values) in enumerate(member.items()): val_hap1, val_hap2 = hap_1[allele_name], hap_2[allele_name] val_member = Als() val_member.extend(member[allele_name]) # the first two conditions are for [] and ["", ""] if not val_member or not any( val_member ) or val_hap1 in val_member and val_hap2 in val_member: pairs_consistent[idx_allele] = True continue if all(pairs_consistent): return True return False
def divide_alleles_to_2_groups(dict_children_one_allele): """ dividing alleles_names of 3 children or more to 2 groups (one for each parent) for example, c1:[01, 02], c2:[02, 03], c3:[01, 04], so -> par1: 01~03, par2: 02~04 there are 2 cases: 1. easy case: there is homozygous _child_, so divide his alleles_names to the 2 groups, and then go over the other children alleles_names, and insert in some order (no matter how) to the groups, until each group is of size 2 for example: c1:[01, 01], c2:[01, 02], c3:[02, 03] -->(iter1) par1: 01, par2: 01 -->(iter2) par1: 01~02, par2: 01 -->(iter3) par1: 01~02, par2: 01~03 2. difficult case: no homozygous _child_, so add the alleles_names of the first _child_, and then, for the others, call to 'divide_2_alleles_to_non_empty_groups' :param dict_children_one_allele: children dict, that contains data about one allele only :return: 2 groups """ gr1, gr2 = Als(), Als() is_homoz, homoz_allele = check_if_exist_homoz(dict_children_one_allele) if is_homoz: # if a _child_ has [01, 01] so each parent has '01' gr1.append(homoz_allele) gr2.append(homoz_allele) for alleles in dict_children_one_allele.values(): for al in alleles: if al not in gr1 and al not in gr2: if len(gr1) < 2: gr1.append(al) elif len(gr2) < 2: gr2.append(al) if len(gr1) == len(gr2) == 2: # the groups are full break else: # no homozygous # 'try_again' let us know if one allele did not succeed to be inserted to the groups in first time, so after # insertion the other, we try to insert it again. more explanations in documentation of 'divide_2_alleles..' try_again = [] for alleles in dict_children_one_allele.values(): if len(gr1) == len( gr2) == 0: # first insertion, the order is not matter gr1.append(alleles[0]) gr2.append(alleles[1]) else: divide_2_alleles_to_non_empty_groups(gr1, gr2, alleles, try_again) if len(try_again) > 0: divide_2_alleles_to_non_empty_groups(gr1, gr2, try_again[0], []) return gr1, gr2