def validate(variant): # check each values of variant # genomic_ref if variant.genomic_ref.strip() != '': # check if starting with chr or nc_ if 'chr' in variant.genomic_ref.lower(): # check for numeric values after 'chr' if not variant.genomic_ref[3:].isdigit(): return False elif 'nc_' in variant.genomic_ref.lower(): # check for numeric values after 'nc_' try: float(variant.genomic_ref[3:]) except: return False else: return False # position if variant.position.strip() != '': # simple check if position is numeric if not check_value(variant.position): return False # position_intron if variant.position_intron.strip() != '': # if position_intron has a value then position must have a value too. if variant.position != '': if not check_intron_value(variant.position_intron): return False else: return False # range_lower if variant.range_lower.strip() != '': # if there is a value in the range_lower then position must be empty if variant.position.strip() == '': if not check_value(variant.range_lower): return False else: return False # range_lower_intron if variant.range_lower_intron.strip() != '': # if range_lower_intron has a value then range_lower must have a value too. if variant.range_lower.strip() != '': if not check_intron_value(variant.range_lower_intron): return False else: return False # range_upper if variant.range_upper.strip() != '': # if range_upper has a value then range_lower must have a value to if variant.range_lower.strip() != '': if not check_value(variant.range_upper): return False else: return False # range_upper_intron if variant.range_upper_intron.strip() != '': # if range_upper_intron has a value then range_upper must hava a value too if variant.range_upper.strip() != '': if not check_intron_value(variant.range_upper_intron.strip()): return False else: return False operator_is_repeat = False # operator if variant.operator.strip() != '': if variant.operator not in ValidValues.operators: # if operator contains a '>' if '>' in variant.operator: # check if operator is a sub if (variant.operator[0].lower() not in ValidValues.nucleotides): return False if (variant.operator[1] != '>'): return False if (variant.operator[2].lower() not in ValidValues.nucleotides): return False else: # contains only nucleotides for repeat if variant.operator[0].lower() in ValidValues.nucleotides: i = 1 # check the 1st value move to next one operator_is_repeat = True while i < len(variant.operator): if variant.operator[i].lower() in ValidValues.nucleotides: i += 1 else: return False else: # get the repeater range value if not get_repeater_value(variant.operator): return False # operator value if operator_is_repeat: # if operator is a repeat then we check the repeat range if variant.operator_value: if not get_repeater_value(variant.operator_value): return False # if it survived the checks then return true return True
def validate(variant): # Please not that since protein nomenclature is different to genomic/cDNA structure the values are # stored differently in the VariantName Class. # Since the protein positions and ranges contain the amino acid and numeric value for the index # of that amino acid the variant position, range_lower and range_upper will store the amino acids # while the intron fields will store the index. # position: amino acid if variant.position.strip() != '': if variant.position not in ('?', '='): if not variant.position.lower() in ValidValues.amino_acids: if not variant.position.lower( ) in ValidValues.amino_acids_single: return False else: return True # position: index of the amino acid if variant.position_intron.strip() != '': # if position has a value then intron should too if variant.position.strip() != '': if not check_numeric_value(variant.position_intron): return False else: return False # range_lower: amino acid if variant.range_lower.strip() != '': if variant.position.strip() == '': if not variant.range_lower.lower() in ValidValues.amino_acids: return False else: return False # range_lower: index of the amino acid if variant.range_lower_intron.strip() != '': if variant.range_lower.strip() != '': if not check_numeric_value(variant.range_lower_intron): return False else: return False # range_upper: amino acid if variant.range_upper.strip() != '': if variant.range_lower.strip() != '': if not variant.range_upper.lower() in ValidValues.amino_acids: return False else: return False # range_upper: index of the amino acid if variant.range_upper_intron.strip() != '': if variant.range_upper.strip() != '': if not check_numeric_value(variant.range_upper_intron): return False else: return False # Operator if variant.operator.strip() != '': # check for repeating range if not variant.operator[0] != '(' or not variant.operator[0] != '[': if not get_repeater_value(variant.operator): return False # check for indels --> 'delins' and insertions --> 'ins' elif variant.operator.lower() not in ValidValues.protein_operators: # check for amino acids if not variant.operator.lower() in ValidValues.amino_acids: if not variant.operator.lower( ) in ValidValues.amino_acids_single: return False else: return False # Operator Value: should only contain amino acids for indel and insertion # operators. if variant.operator_value.strip() != '': # frameshifts if variant.operator_value[0:2] == 'fs': if len(variant.operator_value) > 2: if variant.operator_value[2].lower() in ('*', 'x'): # check for '];[' if '];[' in variant.operator_value[3:]: p = variant.operator_value.index('];[') if variant.operator_value[3:p] != '': if not variant.operator_value[3:p].isdigit(): return False else: if not variant.operator_value[3:].isdigit(): return False else: return False else: # ignore if operator begins with ']' or ')' if ']' not in variant.operator_value and ')' not in variant.operator_value: # operator value can not be empty if the operator is an indel or insertion if variant.operator.lower( ) not in ValidValues.protein_operators: # The length of operator string should be divisble by 3 since the amino acid # codes should only be 3 chars long. if not len(variant.operator_value) % 3 != 0: return False else: # need to check each amino acid if valid amino_acids = split(variant.operator_value.lower(), 3) for amino_acid in amino_acids: item_found = False if amino_acid in ValidValues.amino_acids: item_found = True # if amino acid not found if not item_found: return False return True
def validate(variant): # Please not that since protein nomenclature is different to genomic/cDNA structure the values are # stored differently in the VariantName Class. # Since the protein positions and ranges contain the amino acid and numeric value for the index # of that amino acid the variant position, range_lower and range_upper will store the amino acids # while the intron fields will store the index. # position: amino acid if variant.position.strip() != '': if variant.position not in ('?', '='): if not variant.position.lower() in ValidValues.amino_acids: if not variant.position.lower() in ValidValues.amino_acids_single: return False else: return True # position: index of the amino acid if variant.position_intron.strip() != '': # if position has a value then intron should too if variant.position.strip() != '': if not check_numeric_value(variant.position_intron): return False else: return False # range_lower: amino acid if variant.range_lower.strip() != '': if variant.position.strip() == '': if not variant.range_lower.lower() in ValidValues.amino_acids: return False else: return False # range_lower: index of the amino acid if variant.range_lower_intron.strip() != '': if variant.range_lower.strip() != '': if not check_numeric_value(variant.range_lower_intron): return False else: return False # range_upper: amino acid if variant.range_upper.strip() != '': if variant.range_lower.strip() != '': if not variant.range_upper.lower() in ValidValues.amino_acids: return False else: return False # range_upper: index of the amino acid if variant.range_upper_intron.strip() != '': if variant.range_upper.strip() != '': if not check_numeric_value(variant.range_upper_intron): return False else: return False # Operator if variant.operator.strip() != '': # check for repeating range if not variant.operator[0] != '(' or not variant.operator[0] != '[': if not get_repeater_value(variant.operator): return False # check for indels --> 'delins' and insertions --> 'ins' elif variant.operator.lower() not in ValidValues.protein_operators: # check for amino acids if not variant.operator.lower() in ValidValues.amino_acids: if not variant.operator.lower() in ValidValues.amino_acids_single: return False else: return False # Operator Value: should only contain amino acids for indel and insertion # operators. if variant.operator_value.strip() != '': # frameshifts if variant.operator_value[0:2] == 'fs': if len(variant.operator_value) > 2: if variant.operator_value[2].lower() in ('*', 'x'): # check for '];[' if '];[' in variant.operator_value[3:]: p = variant.operator_value.index('];[') if variant.operator_value[3:p] != '': if not variant.operator_value[3:p].isdigit(): return False else: if not variant.operator_value[3:].isdigit(): return False else: return False else: # ignore if operator begins with ']' or ')' if ']' not in variant.operator_value and ')' not in variant.operator_value: # operator value can not be empty if the operator is an indel or insertion if variant.operator.lower() not in ValidValues.protein_operators: # The length of operator string should be divisble by 3 since the amino acid # codes should only be 3 chars long. if not len(variant.operator_value) % 3 != 0: return False else: # need to check each amino acid if valid amino_acids = split(variant.operator_value.lower(), 3) for amino_acid in amino_acids: item_found = False if amino_acid in ValidValues.amino_acids: item_found = True # if amino acid not found if not item_found: return False return True