Пример #1
0
def get_mean_length(values):
    """ Returns the mean (average) length of the input.  Ignores unknown 
        values, if no values found besides unknown it will just return 'None'

        Inputs:
          - a dictionary of frequency distribution - with all data of a 
            string type, and character keyss and numeric data representing
            occurances of the keys.  Exceptions will be ignored.
        Outputs:
          - a single value - the mean of the inputs
        Test Coverage:
          - complete via test harness
    """
    count   = 0
    accum   = 0

    for value in values:
        if field_type.is_unknown(value):
            continue
        try:
            accum += len(value) * int(values[value])
            count += int(values[value])
        except ValueError:
            pass                # usually 'unknown values', sometimes garbage
    try:
        return accum / count
    except ZeroDivisionError:
        return None
Пример #2
0
    def get_known_values(self, fieldno):
        """ returns a frequency-distribution dictionary that is the
            self.field_freqs with unknown values removed.
        """

        return [val for val in self.field_freqs[fieldno]
                if typer.is_unknown(val) is False]
Пример #3
0
def get_mean_length(values):
    """ Returns the mean (average) length of the input.  Ignores unknown 
        values, if no values found besides unknown it will just return 'None'

        Inputs:
          - a dictionary of frequency distribution - with all data of a 
            string type, and character keyss and numeric data representing
            occurances of the keys.  Exceptions will be ignored.
        Outputs:
          - a single value - the mean of the inputs
        Test Coverage:
          - complete via test harness
    """
    count = 0
    accum = 0

    for value in values:
        if field_type.is_unknown(value):
            continue
        try:
            accum += len(value) * int(values[value])
            count += int(values[value])
        except ValueError:
            pass  # usually 'unknown values', sometimes garbage
    try:
        return accum / count
    except ZeroDivisionError:
        return None
Пример #4
0
def get_max(value_type, values):
    """ Returns the maximum value of the input.  Ignores unknown values, if
        no values found besides unknown it will just return 'None'

        Inputs:
          - value_type - one of integer, float, string, timestap
          - dictionary or list of string values
        Outputs:
          - the single maximum value of the appropriate type

        Test Coverage:
          - complete via test harness

    """
    assert value_type in ['integer', 'float', 'string', 'timestamp', 'unknown', None]

    known_vals = []
    for val in values:
        if not typer.is_unknown(val):
            try:
                if value_type == 'integer':
                    known_vals.append(int(val))
                elif value_type == 'float':
                    known_vals.append(float(val))
                else:
                    known_vals.append(val)
            except ValueError:
                pass                       # ignore invalid values

    try:
        return str(max(known_vals))
    except ValueError:
        return None
Пример #5
0
    def get_known_values(self, fieldno):
        """ returns a frequency-distribution dictionary that is the
            self.field_freqs with unknown values removed.
        """

        return [
            val for val in self.field_freqs[fieldno]
            if typer.is_unknown(val) is False
        ]
Пример #6
0
def get_max(value_type, values):
    """ Returns the maximum value of the input.  Ignores unknown values, if
        no values found besides unknown it will just return 'None'

        Inputs:
          - value_type - one of integer, float, string, timestap
          - dictionary or list of string values
        Outputs:
          - the single maximum value of the appropriate type

        Test Coverage:
          - complete via test harness

        Issues:
          - doesn't report to caller number of values rejected due to
            unknown or invalid values
    """
    assert(value_type in ['integer', 'float', 'string', 'timestamp',
                          'unknown', None])
    unknown_field_cnt = 0
    invalid_field_cnt = 0

    #simpler, older, solution, but didn't support try,except code
    #if value_type == 'integer':
    #    known_vals = [int(val) for val in values if not typer.is_unknown(val)]
    #elif value_type == 'float':
    #    known_vals = [float(val) for val in values if not typer.is_unknown(val)]
    #else:
    #    known_vals = [val for val in values if not typer.is_unknown(val)]

    known_vals = []
    for val in values:
        if typer.is_unknown(val):
            unknown_field_cnt += 1
        else:
            try:
                if value_type == 'integer':
                    known_vals.append(int(val))
                elif value_type == 'float':
                    known_vals.append(float(val))
                else:
                    known_vals.append(val)
            except ValueError:
                invalid_field_cnt += 1

    try:
        if value_type in ['integer','float']:
            return str(max(known_vals))
        else:
            return max(known_vals)
    except ValueError:
        return None
Пример #7
0
def get_max(value_type, values):
    """ Returns the maximum value of the input.  Ignores unknown values, if
        no values found besides unknown it will just return 'None'

        Inputs:
          - value_type - one of integer, float, string, timestap
          - dictionary or list of string values
        Outputs:
          - the single maximum value of the appropriate type

        Test Coverage:
          - complete via test harness

        Issues:
          - doesn't report to caller number of values rejected due to
            unknown or invalid values
    """
    assert (value_type
            in ['integer', 'float', 'string', 'timestamp', 'unknown', None])
    unknown_field_cnt = 0
    invalid_field_cnt = 0

    #simpler, older, solution, but didn't support try,except code
    #if value_type == 'integer':
    #    known_vals = [int(val) for val in values if not typer.is_unknown(val)]
    #elif value_type == 'float':
    #    known_vals = [float(val) for val in values if not typer.is_unknown(val)]
    #else:
    #    known_vals = [val for val in values if not typer.is_unknown(val)]

    known_vals = []
    for val in values:
        if typer.is_unknown(val):
            unknown_field_cnt += 1
        else:
            try:
                if value_type == 'integer':
                    known_vals.append(int(val))
                elif value_type == 'float':
                    known_vals.append(float(val))
                else:
                    known_vals.append(val)
            except ValueError:
                invalid_field_cnt += 1

    try:
        if value_type in ['integer', 'float']:
            return str(max(known_vals))
        else:
            return max(known_vals)
    except ValueError:
        return None
Пример #8
0
def get_min(value_type, values):
    """ Returns the minimum value of the input.  Ignores unknown values, if 
        no values found besides unknown it will just return 'None'

        Inputs:
          - value_type - one of integer, float, string, timestap
          - dictionary or list of string values
        Outputs:
          - the single maximum value of the appropriate type

        Test Coverage:
          - complete via test harness
 
        Issues:
          - doesn't report to caller number of values rejected due to 
            unknown or invalid values
    """
    assert value_type in ["integer", "float", "string", "timestamp", "unknown", None]
    unknown_field_cnt = 0
    invalid_field_cnt = 0

    # first handle types & unknowns:
    # if value_type == 'integer':
    #    known_vals = [int(val) for val in values if not typer.is_unknown(val)]
    # elif value_type == 'float':
    #    known_vals = [float(val) for val in values if not typer.is_unknown(val)]
    # else:
    #    known_vals = [val for val in values if not typer.is_unknown(val)]
    known_vals = []
    for val in values:
        if typer.is_unknown(val):
            unknown_field_cnt += 1
        else:
            try:
                if value_type == "integer":
                    known_vals.append(int(val))
                elif value_type == "float":
                    known_vals.append(float(val))
                else:
                    known_vals.append(val)
            except ValueError:
                invalid_field_cnt += 1

    # next return the minimum value
    try:
        if value_type in ["integer", "float"]:
            return str(min(known_vals))
        else:
            return min(known_vals)
    except ValueError:
        return None
Пример #9
0
def get_case(field_type, values):
    """ Determines the case of a list or dictionary of values.
        Args:
          - type:    if not == 'string', will return 'n/a'
          - values:  could be either dictionary or list.  If it's a list, then
                     it will only examine the keys.
        Returns:
          - one of:  'mixed','lower','upper','unknown'
        Misc notes:
          - "unknown values" are ignored
          - empty values list/dict results in 'unknown' result
        To do:
          - add consistency factor
        Test coverage:
          - complete, via test harness
    """
    freq = collections.defaultdict(int)
    case = None

    if field_type != 'string':
        return 'n/a'

    # count occurances of each case field_type in values:
    for key in values:
        if typer.is_unknown(key):
            freq['unk']    += 1
        elif typer.is_integer(key):     # will be ignoring these
            freq['number'] += 1
        elif typer.is_float(key):       # will be ignoring these
            freq['number'] += 1
        elif key.islower():
            freq['lower'] += 1
        elif key.isupper():
            freq['upper'] += 1
        else:
            freq['mixed'] += 1

    # evaluate frequency distribution:
    if 'mixed' in freq:
        case = 'mixed'
    elif ('lower' in freq and 'upper' not in freq):
        case = 'lower'
    elif ('lower' not in freq and 'upper' in freq):
        case = 'upper'
    elif ('lower' in freq and 'upper' in freq):
        case = 'mixed'
    else:
        case = 'unknown'

    return case
Пример #10
0
def get_min_length(values):
    """ Returns the minimum length value of the input.   If
        no values found besides unknown it will just return 'None'

        Inputs:
          - dictionary or list of string values
        Outputs:
          - the single minimum value
    """
    min_length = 999999

    for value in values:
        if not typer.is_unknown(value):
            if len(value) < min_length:
                min_length = len(value)

    return min_length
Пример #11
0
def get_min_length(values):
    """ Returns the minimum length value of the input.   If
        no values found besides unknown it will just return 'None'

        Inputs:
          - dictionary or list of string values
        Outputs:
          - the single minimum value
    """
    min_length = 999999

    for value in values:
        if not typer.is_unknown(value):
            if len(value) < min_length:
                min_length = len(value)

    return min_length
Пример #12
0
def get_case(field_type, values):
    """ Determines the case of a list or dictionary of values.
        Args:
          - type:    if not == 'string', will return 'n/a'
          - values:  could be either dictionary or list.  If it's a list, then
                     it will only examine the keys.
        Returns:
          - one of:  'mixed','lower','upper','unknown'
        Misc notes:
          - "unknown values" are ignored
          - empty values list/dict results in 'unknown' result
        To do:
          - add consistency factor
        Test coverage:
          - complete, via test harness
    """
    freq = collections.defaultdict(int)
    case = None

    if field_type != 'string':
        return 'n/a'

    # count occurances of each case field_type in values:
    for key in values:
        if typer.is_unknown(key):
            freq['unk'] += 1
        elif typer.is_integer(key):  # will be ignoring these
            freq['number'] += 1
        elif typer.is_float(key):  # will be ignoring these
            freq['number'] += 1
        elif key.islower():
            freq['lower'] += 1
        elif key.isupper():
            freq['upper'] += 1
        else:
            freq['mixed'] += 1

    # evaluate frequency distribution:
    if 'mixed' in freq:
        case = 'mixed'
Пример #13
0
 def test_type_d01_is_unknown(self):
     assert(mod.is_unknown('')          is True)
     assert(mod.is_unknown(' ')         is True)
     assert(mod.is_unknown('na')        is True)
     assert(mod.is_unknown('NA')        is True)
     assert(mod.is_unknown('n/a')       is True)
     assert(mod.is_unknown('N/A')       is True)
     assert(mod.is_unknown('unk')       is True)
     assert(mod.is_unknown('unknown')   is True)
     assert(mod.is_unknown('b')         is False)
     assert(mod.is_unknown('$3')        is False)
     assert(mod.is_unknown('4,333.22')  is False)
     assert(mod.is_unknown('33.22')     is False)
     assert(mod.is_unknown('3')         is False)
     assert(mod.is_unknown('-3')        is False)
     assert(mod.is_unknown(3)           is False)
     assert(mod.is_unknown(3.3)         is False)
     assert(mod.is_unknown(None)        is False)
Пример #14
0
 def test_type_d01_is_unknown(self):
     assert (mod.is_unknown('') is True)
     assert (mod.is_unknown(' ') is True)
     assert (mod.is_unknown('na') is True)
     assert (mod.is_unknown('NA') is True)
     assert (mod.is_unknown('n/a') is True)
     assert (mod.is_unknown('N/A') is True)
     assert (mod.is_unknown('unk') is True)
     assert (mod.is_unknown('unknown') is True)
     assert (mod.is_unknown('b') is False)
     assert (mod.is_unknown('$3') is False)
     assert (mod.is_unknown('4,333.22') is False)
     assert (mod.is_unknown('33.22') is False)
     assert (mod.is_unknown('3') is False)
     assert (mod.is_unknown('-3') is False)
     assert (mod.is_unknown(3) is False)
     assert (mod.is_unknown(3.3) is False)
     assert (mod.is_unknown(None) is False)
Пример #15
0
 def test_is_unknown_basics(self):
     assert mod.is_unknown('')
     assert mod.is_unknown(' ')
     assert mod.is_unknown('na')
     assert mod.is_unknown('NA')
     assert mod.is_unknown('n/a')
     assert mod.is_unknown('N/A')
     assert mod.is_unknown('unk')
     assert mod.is_unknown('unknown')
     assert mod.is_unknown('b')         is False
     assert mod.is_unknown('$3')        is False
     assert mod.is_unknown('4,333.22')  is False
     assert mod.is_unknown('33.22')     is False
     assert mod.is_unknown('3')         is False
     assert mod.is_unknown('-3')        is False
     assert mod.is_unknown(3)           is False
     assert mod.is_unknown(3.3)         is False
     assert mod.is_unknown(None)        is False
Пример #16
0
 def test_is_unknown_basics(self):
     assert mod.is_unknown("")
     assert mod.is_unknown(" ")
     assert mod.is_unknown("na")
     assert mod.is_unknown("NA")
     assert mod.is_unknown("n/a")
     assert mod.is_unknown("N/A")
     assert mod.is_unknown("unk")
     assert mod.is_unknown("unknown")
     assert mod.is_unknown("b") is False
     assert mod.is_unknown("$3") is False
     assert mod.is_unknown("4,333.22") is False
     assert mod.is_unknown("33.22") is False
     assert mod.is_unknown("3") is False
     assert mod.is_unknown("-3") is False
     assert mod.is_unknown(3) is False
     assert mod.is_unknown(3.3) is False
     assert mod.is_unknown(None) is False