예제 #1
0
def calculatePrevalenceStatistic(data):
    """
    Iterate over a state variable that has been created via the tabulateMarkerCounts 
    function. This will perform the necessary calculations to populate the state
    variable with a prevalence statistic

    Prevalence can be calculated by taking the total genotyped/CN and dividing it by 
    the sample size of the marker
    """
    for dataElemList in generateCountList(data):
        # If we are working with a 'genotype' or 'Genotyping failure' or
        # 'No data' we want to skip prevalence calculations
        if not validateGenotypes(list(dataElemList[2])):
            continue

        sampleSize = dataElemList[4]
        markerGenotyped = dataElemList[5]

        # If our genotyped count is 0 we want to set prevalence to 0
        # to avoid division by zero
        markerPrevalence = 0
        if float(markerGenotyped) > 0:
            markerPrevalence = float(markerGenotyped) / sampleSize

        data[dataElemList[0]][dataElemList[1]][dataElemList[2]][dataElemList[3]]["prevalence"] = markerPrevalence
예제 #2
0
def incrementGenotypeCount(dict, metaKey, markerKey, genotype, groups, age):
    """
    Increment the state dictionary with the three keys provided. If the key does
    not already exist in the dictionary the default value is set to 1 otherwise
    it is incremented by 1
    
    If a group of ages is passed into this function we also want to categorize 
    all of our increments 
    """
    dict.setdefault(metaKey, OrderedDict()).setdefault(markerKey, OrderedDict()).setdefault(
        genotype, OrderedDict()
    ).setdefault("All", OrderedDict()).setdefault("genotyped", 0)
    genotypeAll = dict[metaKey][markerKey][genotype]["All"]["genotyped"]
    genotypeAll += 1

    # Initialize our sample size to 0 to avoid any errors
    dict[metaKey][markerKey].setdefault("sample_size", OrderedDict()).setdefault("All", 0)

    sampleAll = dict[metaKey][markerKey]["sample_size"]["All"]
    if validateGenotypes(genotype):
        sampleAll += 1
        dict[metaKey][markerKey]["sample_size"]["All"] = sampleAll

    dict[metaKey][markerKey][genotype]["All"]["genotyped"] = genotypeAll

    # If our age key is not None we need to add this age group
    if groups:
        incrementCountsByAgeGroup(dict, metaKey, markerKey, genotype, groups, age)
예제 #3
0
def incrementCountsByAgeGroup(dict, metaKey, markerKey, genotype, groups, age):
    """
    Initializes all age groups in our statistics dictionary and increments
    only the age groups where a row of data containing that age was found
    """
    groupKey = None

    for group in groups:
        # The groups list should contain a list of age groups in the following
        # tuple format:
        #
        #     [ (lower, upper, label), (lower, upper, label), .... ]
        #
        # We should always assume that our grouping will be lower <= age <= upper
        # and our group key will be returned as "lower - upper".
        #
        # The two fringe cases we will have to look out for will be (0, upper)
        # and (lower, 200) in these cases we are dealing with edge cases such as
        # (0, 1) and (12, 200) which would be represented as age < 1 and
        # age > 12
        #
        # The third element in the tuple, 'label', will represent the key assigned
        # in the dictionary housing our statistics
        (lower, upper, label) = group

        dict[metaKey][markerKey]["sample_size"].setdefault(label, 0)
        dict[metaKey][markerKey][genotype].setdefault(label, OrderedDict()).setdefault("genotyped", 0)

        if age is not None:
            if lower is None:
                if float(age) < upper:
                    groupKey = label

            if upper is None:
                if float(age) > lower:
                    groupKey = label

            if lower is not None and upper is not None:
                if lower <= float(age) <= upper:
                    groupKey = label

    if groupKey is not None:
        # Once again, hacky but we do not want to increment the sample size for a given
        # group if our genotype is 'Not genotyped' or 'Genotyping failure'
        if validateGenotypes(genotype):
            dict[metaKey][markerKey]["sample_size"][groupKey] += 1

        dict[metaKey][markerKey][genotype][groupKey]["genotyped"] += 1