Exemplo n.º 1
0
def validate(variant):
    # check each values of variant
    # genomic_ref
    if variant.genomic_ref.strip() != '':
        # check if starting with chr or nc_
        if 'chr' in variant.genomic_ref.lower():
            # check for numeric values after 'chr'
            if not variant.genomic_ref[3:].isdigit():
                return False
        
        elif 'nc_' in variant.genomic_ref.lower():
            # check for numeric values after 'nc_'
            try:
                float(variant.genomic_ref[3:])
            except:
                return False
        else:
            return False
    
    # position
    if variant.position.strip() != '':
        # simple check if position is numeric
        if not check_value(variant.position):
            return False
        
    # position_intron
    if variant.position_intron.strip() != '':
        # if position_intron has a value then position must have a value too.
        if variant.position != '':
            if not check_intron_value(variant.position_intron):
                return False
        else:
            return False
    
    # range_lower
    if variant.range_lower.strip() != '':
        # if there is a value in the range_lower then position must be empty
        if variant.position.strip() == '':
            if not check_value(variant.range_lower):
                return False
        else:
            return False
    
    # range_lower_intron
    if variant.range_lower_intron.strip() != '':
        # if range_lower_intron has a value then range_lower must have a value too.
        if variant.range_lower.strip() != '':
            if not check_intron_value(variant.range_lower_intron):
                return False
        else:
            return False

    # range_upper
    if variant.range_upper.strip() != '':
    # if range_upper has a value then range_lower must have a value to
        if variant.range_lower.strip() != '':
            if not check_value(variant.range_upper):
                return False
        else:
            return False
    
    # range_upper_intron
    if variant.range_upper_intron.strip() != '':
        # if range_upper_intron has a value then range_upper must hava a value too
        if variant.range_upper.strip() != '':
            if not check_intron_value(variant.range_upper_intron.strip()):
                return False
        else:
            return False

    operator_is_repeat = False
    # operator
    if variant.operator.strip() != '':
        if variant.operator not in ValidValues.operators:
            # if operator contains a '>'
            if '>' in variant.operator:
                # check if operator is a sub
                if (variant.operator[0].lower() not in ValidValues.nucleotides):
                    return False
                if (variant.operator[1] != '>'):
                    return False
                if (variant.operator[2].lower() not in ValidValues.nucleotides):
                    return False
            else:
                # contains only nucleotides for repeat
                if variant.operator[0].lower() in ValidValues.nucleotides:
                    i = 1 # check the 1st value move to next one
                    operator_is_repeat = True
                    while i < len(variant.operator):
                        if variant.operator[i].lower() in ValidValues.nucleotides:
                            i += 1
                        else:
                            return False
                else:        
                    # get the repeater range value
                    if not get_repeater_value(variant.operator):
                        return False

    # operator value
    if operator_is_repeat: # if operator is a repeat then we check the repeat range
        if variant.operator_value:
            if not get_repeater_value(variant.operator_value):
                return False
        
    # if it survived the checks then return true
    return True
Exemplo n.º 2
0
def validate(variant):
    # Please not that since protein nomenclature is different to genomic/cDNA structure the values are
    # stored differently in the VariantName Class.
    # Since the protein positions and ranges contain the amino acid and numeric value for the index
    # of that amino acid the variant position, range_lower and range_upper  will store the amino acids
    # while the intron fields will store the index.

    # position: amino acid
    if variant.position.strip() != '':
        if variant.position not in ('?', '='):
            if not variant.position.lower() in ValidValues.amino_acids:
                if not variant.position.lower(
                ) in ValidValues.amino_acids_single:
                    return False
        else:
            return True

    # position: index of the amino acid
    if variant.position_intron.strip() != '':
        # if position has a value then intron should too
        if variant.position.strip() != '':
            if not check_numeric_value(variant.position_intron):
                return False
        else:
            return False

    # range_lower: amino acid
    if variant.range_lower.strip() != '':
        if variant.position.strip() == '':
            if not variant.range_lower.lower() in ValidValues.amino_acids:
                return False
        else:
            return False

    # range_lower: index of the amino acid
    if variant.range_lower_intron.strip() != '':
        if variant.range_lower.strip() != '':
            if not check_numeric_value(variant.range_lower_intron):
                return False
        else:
            return False

    # range_upper: amino acid
    if variant.range_upper.strip() != '':
        if variant.range_lower.strip() != '':
            if not variant.range_upper.lower() in ValidValues.amino_acids:
                return False
        else:
            return False

    # range_upper: index of the amino acid
    if variant.range_upper_intron.strip() != '':
        if variant.range_upper.strip() != '':
            if not check_numeric_value(variant.range_upper_intron):
                return False
        else:
            return False

    # Operator
    if variant.operator.strip() != '':
        # check for repeating range
        if not variant.operator[0] != '(' or not variant.operator[0] != '[':
            if not get_repeater_value(variant.operator):
                return False
        # check for indels --> 'delins' and insertions --> 'ins'
        elif variant.operator.lower() not in ValidValues.protein_operators:
            # check for amino acids
            if not variant.operator.lower() in ValidValues.amino_acids:
                if not variant.operator.lower(
                ) in ValidValues.amino_acids_single:
                    return False
    else:
        return False

    # Operator Value: should only contain amino acids for indel and insertion
    # operators.
    if variant.operator_value.strip() != '':
        # frameshifts
        if variant.operator_value[0:2] == 'fs':
            if len(variant.operator_value) > 2:
                if variant.operator_value[2].lower() in ('*', 'x'):
                    # check for '];['
                    if '];[' in variant.operator_value[3:]:
                        p = variant.operator_value.index('];[')
                        if variant.operator_value[3:p] != '':
                            if not variant.operator_value[3:p].isdigit():
                                return False
                    else:
                        if not variant.operator_value[3:].isdigit():
                            return False
                else:
                    return False
        else:
            # ignore if operator begins with ']' or ')'
            if ']' not in variant.operator_value and ')' not in variant.operator_value:
                # operator value can not be empty if the operator is an indel or insertion
                if variant.operator.lower(
                ) not in ValidValues.protein_operators:
                    # The length of operator string should be divisble by 3 since the amino acid
                    # codes should only be 3 chars long.
                    if not len(variant.operator_value) % 3 != 0:
                        return False
                    else:
                        # need to check each amino acid if valid
                        amino_acids = split(variant.operator_value.lower(), 3)
                        for amino_acid in amino_acids:
                            item_found = False
                            if amino_acid in ValidValues.amino_acids:
                                item_found = True
                            # if amino acid not found
                            if not item_found:
                                return False

    return True
def validate(variant): 
    # Please not that since protein nomenclature is different to genomic/cDNA structure the values are
    # stored differently in the VariantName Class.
    # Since the protein positions and ranges contain the amino acid and numeric value for the index
    # of that amino acid the variant position, range_lower and range_upper  will store the amino acids
    # while the intron fields will store the index.

    # position: amino acid
    if variant.position.strip() != '':
        if variant.position not in ('?', '='):
            if not variant.position.lower() in ValidValues.amino_acids:
                if not variant.position.lower() in ValidValues.amino_acids_single:
                    return False
        else:
            return True

    # position: index of the amino acid 
    if variant.position_intron.strip() != '':
        # if position has a value then intron should too
        if variant.position.strip() != '':
            if not check_numeric_value(variant.position_intron):
                return False
        else:
            return False

    # range_lower: amino acid
    if variant.range_lower.strip() != '':
        if variant.position.strip() == '':
            if not variant.range_lower.lower() in ValidValues.amino_acids:
                return False
        else:
            return False

    # range_lower: index of the amino acid
    if variant.range_lower_intron.strip() != '':
        if variant.range_lower.strip() != '':
            if not check_numeric_value(variant.range_lower_intron):
                return False
        else:
            return False

    # range_upper: amino acid
    if variant.range_upper.strip() != '':
        if variant.range_lower.strip() != '':
            if not variant.range_upper.lower() in ValidValues.amino_acids:
                return False
        else:
            return False

    # range_upper: index of the amino acid
    if variant.range_upper_intron.strip() != '':
        if variant.range_upper.strip() != '':
            if not check_numeric_value(variant.range_upper_intron):
                return False
        else:
            return False
    
    # Operator
    if variant.operator.strip() != '':
        # check for repeating range
        if not variant.operator[0] != '(' or not variant.operator[0] != '[':
            if not get_repeater_value(variant.operator):
                return False
        # check for indels --> 'delins' and insertions --> 'ins'
        elif variant.operator.lower() not in ValidValues.protein_operators:
            # check for amino acids
            if not variant.operator.lower() in ValidValues.amino_acids:
                if not variant.operator.lower() in ValidValues.amino_acids_single:
                    return False
    else:
        return False

    # Operator Value: should only contain amino acids for indel and insertion
    # operators. 
    if variant.operator_value.strip() != '':
        # frameshifts        
        if variant.operator_value[0:2] == 'fs':
            if len(variant.operator_value) > 2:
                if variant.operator_value[2].lower() in ('*', 'x'):
					# check for '];['
                    if '];[' in variant.operator_value[3:]:
                        p = variant.operator_value.index('];[')
                        if variant.operator_value[3:p] != '':
                            if not variant.operator_value[3:p].isdigit():
                                return False
                    else:                        
                        if not variant.operator_value[3:].isdigit():
                            return False
                else:
                    return False
        else:
            # ignore if operator begins with ']' or ')'
            if ']' not in variant.operator_value and ')' not in variant.operator_value:
                # operator value can not be empty if the operator is an indel or insertion
                if variant.operator.lower() not in ValidValues.protein_operators:
                    # The length of operator string should be divisble by 3 since the amino acid
                    # codes should only be 3 chars long.
                    if not len(variant.operator_value) % 3 != 0:
                        return False
                    else:
                        # need to check each amino acid if valid
                        amino_acids = split(variant.operator_value.lower(), 3)
                        for amino_acid in amino_acids:
                            item_found = False
                            if amino_acid in ValidValues.amino_acids:
                                item_found = True
                            # if amino acid not found
                            if not item_found:
                                return False
        
    return True