def add_leading_base(chr, pos, ref, alt, seq_provider): pos = int(pos) empty_ref = False empty_alt = False if utilities.isEmpty(ref): ref = "" empty_ref = True if utilities.isEmpty(alt): alt = "" empty_alt = True seq = seq_provider.get_seq(int(chr), pos - 1, 2) seq_pos = 1 if empty_ref is True and empty_alt is True: raise Exception("both ref and alt are empty") elif empty_ref is True: # If the ref is empty, get the base at the position and append it to ref and alt leading_base = seq[seq_pos] return (chr, str(pos), leading_base + ref, leading_base + alt) elif empty_alt is True: # If the alt is empty, get the base at the position just before where the deletion happens # and append it to the ref and alt leading_base = seq[seq_pos - 1] return (chr, str(pos - 1), leading_base + ref, leading_base + alt) else: raise Exception("add leading base called but both ref and alt were provided!")
def determineGnomADAlleleFrequency(row): if isEmpty(row['Allele_frequency_genome_GnomAD']) and isEmpty(row['Allele_frequency_exome_GnomAD']): return EMPTY else: ac_genome = getNumericAFValue(row['Allele_count_genome_GnomAD']) an_genome = getNumericAFValue(row['Allele_number_genome_GnomAD']) ac_exome = getNumericAFValue(row['Allele_count_exome_GnomAD']) an_exome = getNumericAFValue(row['Allele_number_exome_GnomAD']) if (an_genome + an_exome) == 0: return EMPTY return round_sigfigs(((ac_genome + ac_exome) / (an_genome + an_exome)), 4)
def prepare_variant_for_removal_and_log(original_hgvs, normalized_hgvs, items, bx_ids_for_variant, reason_for_discard, variants_to_remove): if reason_for_discard == "Incorrect Reference": logging.warning("Ref incorrect using %s", normalized_hgvs) logging.warning( "Original variant representation of incorrect ref variant before add_leading_base: %s", str(items)) elif reason_for_discard == "Variant ref and alt are the same": logging.warning("Variant ref and alt are the same for variant %s", normalized_hgvs) logging.warning("Original variant representation: %s", str(items)) else: logging.warning("Bad data for variant: %s", normalized_hgvs) logging.warning("Original variant representation: %s", str(items)) for key in bx_ids_for_variant.keys(): reports = bx_ids_for_variant[key] if utilities.isEmpty(reports): continue else: prefix = "BX_ID_" source = key[len(prefix):] log_discarded_reports(source, reports, normalized_hgvs, reason_for_discard) variants_to_remove.append(original_hgvs) return variants_to_remove
def seekPattern(self, id, textArray, seekFirstMatch = True): method=self.base.getMethod(id) matches=[] for alg in method: for pattern in alg: for line in textArray: found=re.compile(pattern, flags = re.IGNORECASE).search(line) if(utilities.hasValue(found)): matches.append(found.group()) # print(matches) # print("next alg") # print((seekFirstMatch and (utilities.isEmpty(matches)))) if(seekFirstMatch and (not utilities.isEmpty(matches))): break return matches
def getNumericAFValue(value): if isEmpty(value): return 0 else: return float(value)