Пример #1
0
    def test_math_single_string_occurring_once(self):
        list_1 = [('2', 2)]
        list_1a = (0.0, 0.0)
        assert mod.get_variance_and_stddev(list_1, 2) == list_1a
        assert mod.get_variance_and_stddev(list_1) == list_1a

        list_1 = [('2', 2)]
        list_1a = (0.0, 0.0)
        assert mod.get_variance_and_stddev(list_1, 2) == list_1a
        assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #2
0
    def test_math_single_string_occurring_once(self):
        list_1 = [('2', 2)]
        list_1a = (0.0, 0.0)
        assert mod.get_variance_and_stddev(list_1, 2) == list_1a
        assert mod.get_variance_and_stddev(list_1) == list_1a

        list_1 = [('2', 2)]
        list_1a = (0.0, 0.0)
        assert mod.get_variance_and_stddev(list_1, 2) == list_1a
        assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #3
0
    def test_math_identical_single_float_occuring_once(self):
        list_1 = [(2.0, 1)]
        list_1a = (0.0, 0.0)
        assert mod.get_variance_and_stddev(list_1, 2) == list_1a
        assert mod.get_variance_and_stddev(list_1) == list_1a

        list_2 = [(2.5, 1)]
        list_2a = (0.0, 0.0)
        assert mod.get_variance_and_stddev(list_2, 2.5) == list_2a
        assert mod.get_variance_and_stddev(list_2) == list_2a
Пример #4
0
    def test_math_identical_single_float_occuring_once(self):
        list_1 = [(2.0, 1)]
        list_1a = (0.0, 0.0)
        assert mod.get_variance_and_stddev(list_1, 2) == list_1a
        assert mod.get_variance_and_stddev(list_1) == list_1a

        list_2 = [(2.5, 1)]
        list_2a = (0.0, 0.0)
        assert mod.get_variance_and_stddev(list_2, 2.5) == list_2a
        assert mod.get_variance_and_stddev(list_2) == list_2a
Пример #5
0
 def test_math_floats_multiple_floats_occuring_once(self):
     list_1 = [(2.0, 1), (3.0, 1), (4.0, 1)]
     list_1a = ('0.67', '0.82')
     var, stddev = mod.get_variance_and_stddev(list_1)
     small_var = self._convert_float(var)
     small_stddev = self._convert_float(stddev)
     assert (small_var, small_stddev) == list_1a
Пример #6
0
    def test_math_multiple_numbers_occuring_once(self):

        list_1 = [(2, 1), (3, 1), (4, 1)]
        list_1a = ('0.67', '0.82')
        var, stddev = mod.get_variance_and_stddev(list_1)
        small_var = self._convert_float(var)
        small_stddev = self._convert_float(stddev)
        assert (small_var, small_stddev) == list_1a

        list_2 = [(2, 1), (3, 1), (9, 1), (12, 1), (13, 1), (15, 1),
                  (17, 1), (19, 1), (22, 1), (23, 1), (25, 1)]
        list_2a = ('53.88', '7.34')
        var, stddev = mod.get_variance_and_stddev(list_2)
        small_var = self._convert_float(var)
        small_stddev = self._convert_float(stddev)
        assert (small_var, small_stddev) == list_2a
Пример #7
0
    def test_math_multiple_numbers_occuring_once(self):

        list_1 = [(2, 1), (3, 1), (4, 1)]
        list_1a = ('0.67', '0.82')
        var, stddev = mod.get_variance_and_stddev(list_1)
        small_var = self._convert_float(var)
        small_stddev = self._convert_float(stddev)
        assert (small_var, small_stddev) == list_1a

        list_2 = [(2, 1), (3, 1), (9, 1), (12, 1), (13, 1), (15, 1),
                  (17, 1), (19, 1), (22, 1), (23, 1), (25, 1)]
        list_2a = ('53.88', '7.34')
        var, stddev = mod.get_variance_and_stddev(list_2)
        small_var = self._convert_float(var)
        small_stddev = self._convert_float(stddev)
        assert (small_var, small_stddev) == list_2a
Пример #8
0
 def test_math_floats_multiple_floats_occuring_once(self):
     list_1 = [(2.0, 1), (3.0, 1), (4.0, 1)]
     list_1a = ('0.67', '0.82')
     var, stddev = mod.get_variance_and_stddev(list_1)
     small_var = self._convert_float(var)
     small_stddev = self._convert_float(stddev)
     assert (small_var, small_stddev) == list_1a
Пример #9
0
 def test_math_multiple_numbers_occurring_multiple_times(self):
     list_4 = [(2, 10), (3, 15), (9, 10), (12, 7), (13, 4), (15, 2),
               (17, 1), (19, 1), (22, 1), (23, 1), (25, 1)]
     list_4a = ('37.11', '6.09')
     var, stddev = mod.get_variance_and_stddev(list_4)
     small_var = self._convert_float(var)
     small_stddev = self._convert_float(stddev)
     assert (small_var, small_stddev) == list_4a
Пример #10
0
 def test_math_multiple_numbers_occurring_multiple_times(self):
     list_4 = [(2, 10), (3, 15), (9, 10), (12, 7), (13, 4), (15, 2),
               (17, 1), (19, 1), (22, 1), (23, 1), (25, 1)]
     list_4a = ('37.11', '6.09')
     var, stddev = mod.get_variance_and_stddev(list_4)
     small_var = self._convert_float(var)
     small_stddev = self._convert_float(stddev)
     assert (small_var, small_stddev) == list_4a
Пример #11
0
    def analyze_fields(self,
                       field_number: Optional[int] = None,
                       field_types_overrides: Optional[Dict[int, str]] = None,
                       max_freq_number: Optional[int] = None,
                       read_limit: int = -1) -> None:
        """ Determines types, names, and characteristics of fields.

            Arguments:
               - field_number: if None, then analyzes all fields, otherwise
                 analyzes just the single field (based on zero-offset)
               - field_types_overrides:
               - max_freq_number: limits size of collected frequency
                 distribution, allowing for faster analysis or analysis of very
                 large high-cardinality fields.
               - read_limit: a performance setting that stops file reads after
                 this number.  The default is -1 which means 'no limit'.
            Returns:
               - Nothing directly - populates instance variables.
        """
        self.max_freq_number = max_freq_number

        if self.verbose:
            print('Field Analysis Progress: ')

        for f_no in range(self.field_cnt):
            if field_number is not None:  # optional analysis of a single field
                if f_no != field_number:
                    continue

            if self.verbose:
                print('   Analyzing field: %d' % f_no)

            self.field_names[f_no] = miscer.get_field_name(self.filename, self.dialect, f_no)

            if max_freq_number is None:
                if field_number is None:
                    max_items = MAX_FREQ_MULTI_COL_DEFAULT
                else:
                    max_items = MAX_FREQ_SINGLE_COL_DEFAULT
            else:
                max_items = max_freq_number

            (self.field_freqs[f_no],
             self.field_trunc[f_no],
             self.field_rows_invalid[f_no]) = miscer.get_field_freq(self.filename,
                                                                    self.dialect,
                                                                    f_no,
                                                                    max_items,
                                                                    read_limit)

            field_freqs = list(self.field_freqs[f_no].items())

            self.field_types[f_no] = typer.get_field_type(self.field_freqs[f_no])
            if field_types_overrides:
                for col_no in field_types_overrides:
                    self.field_types[col_no] = field_types_overrides[col_no]


            self.field_max[f_no] = miscer.get_max(self.field_types[f_no], field_freqs)
            self.field_min[f_no] = miscer.get_min(self.field_types[f_no], field_freqs)

            if self.field_types[f_no] == 'string':
                self.field_case[f_no] = miscer.get_case(self.field_types[f_no], field_freqs)
                self.field_min_length[f_no] = miscer.get_min_length(field_freqs)
                self.field_max_length[f_no] = miscer.get_max_length(field_freqs)
                self.field_mean_length[f_no] = mather.get_mean_length(field_freqs)
            else:
                self.field_case[f_no] = None
                self.field_min_length[f_no] = None
                self.field_max_length[f_no] = None
                self.field_mean_length[f_no] = None


            if self.field_types[f_no] in ('integer', 'float'):
                self.field_mean[f_no] = mather.get_mean(field_freqs)
                self.field_median[f_no] = mather.get_median(field_freqs)
                (self.variance[f_no], self.stddev[f_no])   \
                   = mather.get_variance_and_stddev(field_freqs, self.field_mean[f_no])
            else:
                self.field_mean[f_no] = None
                self.field_median[f_no] = None
                self.variance[f_no] = None
                self.stddev[f_no] = None
Пример #12
0
 def test_math_empty_list(self):
     list_1 = []
     list_1a = (None, None)
     assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #13
0
 def test_math_ignoring_bad_value(self):
     list_1 = [(2, 2), (3, 'foo')]
     list_1a = (0.0, 0.0)
     assert mod.get_variance_and_stddev(list_1, 2) == list_1a
     assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #14
0
 def test_math_ignoring_bad_key(self):
     list_1 = [(2, 2), ('bar', 2)]
     list_1a = (0.0, 0.0)
     assert mod.get_variance_and_stddev(list_1, 2) == list_1a
     assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #15
0
 def test_math_single_float_occurring_multiple_times(self):
     list_1 = [(2.5, 2)]
     list_1a = (0.0, 0.0)
     assert mod.get_variance_and_stddev(list_1, 2.5) == list_1a
     assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #16
0
 def test_math_single_float_occurring_multiple_times(self):
     list_1 = [(2.5, 2)]
     list_1a = (0.0, 0.0)
     assert mod.get_variance_and_stddev(list_1, 2.5) == list_1a
     assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #17
0
 def test_math_ignoring_bad_key(self):
     list_1 = [(2, 2), ('bar', 2)]
     list_1a = (0.0, 0.0)
     assert mod.get_variance_and_stddev(list_1, 2) == list_1a
     assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #18
0
    def analyze_fields(self,
                       field_number: Optional[int] = None,
                       field_types_overrides: Optional[Dict[int, str]] = None,
                       max_freq_number: Optional[int] = None,
                       read_limit: int = -1) -> None:
        """ Determines types, names, and characteristics of fields.

            Arguments:
               - field_number: if None, then analyzes all fields, otherwise
                 analyzes just the single field (based on zero-offset)
               - field_types_overrides:
               - max_freq_number: limits size of collected frequency
                 distribution, allowing for faster analysis or analysis of very
                 large high-cardinality fields.
               - read_limit: a performance setting that stops file reads after
                 this number.  The default is -1 which means 'no limit'.
            Returns:
               - Nothing directly - populates instance variables.
        """
        self.max_freq_number = max_freq_number

        if self.verbose:
            print('Field Analysis Progress: ')

        for f_no in range(self.field_cnt):
            if field_number is not None:  # optional analysis of a single field
                if f_no != field_number:
                    continue

            if self.verbose:
                print('   Analyzing field: %d' % f_no)

            self.field_names[f_no] = miscer.get_field_name(
                self.filename, self.dialect, f_no)

            if max_freq_number is None:
                if field_number is None:
                    max_items = MAX_FREQ_MULTI_COL_DEFAULT
                else:
                    max_items = MAX_FREQ_SINGLE_COL_DEFAULT
            else:
                max_items = max_freq_number

            (self.field_freqs[f_no], self.field_trunc[f_no],
             self.field_rows_invalid[f_no]) = miscer.get_field_freq(
                 self.filename, self.dialect, f_no, max_items, read_limit)

            field_freqs = list(self.field_freqs[f_no].items())

            self.field_types[f_no] = typer.get_field_type(
                self.field_freqs[f_no])
            if field_types_overrides:
                for col_no in field_types_overrides:
                    self.field_types[col_no] = field_types_overrides[col_no]

            self.field_max[f_no] = miscer.get_max(self.field_types[f_no],
                                                  field_freqs)
            self.field_min[f_no] = miscer.get_min(self.field_types[f_no],
                                                  field_freqs)

            if self.field_types[f_no] == 'string':
                self.field_case[f_no] = miscer.get_case(
                    self.field_types[f_no], field_freqs)
                self.field_min_length[f_no] = miscer.get_min_length(
                    field_freqs)
                self.field_max_length[f_no] = miscer.get_max_length(
                    field_freqs)
                self.field_mean_length[f_no] = mather.get_mean_length(
                    field_freqs)
            else:
                self.field_case[f_no] = None
                self.field_min_length[f_no] = None
                self.field_max_length[f_no] = None
                self.field_mean_length[f_no] = None

            if self.field_types[f_no] in ('integer', 'float'):
                self.field_mean[f_no] = mather.get_mean(field_freqs)
                self.field_median[f_no] = mather.get_median(field_freqs)
                (self.variance[f_no], self.stddev[f_no])   \
                   = mather.get_variance_and_stddev(field_freqs, self.field_mean[f_no])
            else:
                self.field_mean[f_no] = None
                self.field_median[f_no] = None
                self.variance[f_no] = None
                self.stddev[f_no] = None
Пример #19
0
 def test_math_empty_list(self):
     list_1 = []
     list_1a = (None, None)
     assert mod.get_variance_and_stddev(list_1) == list_1a
Пример #20
0
 def test_math_ignoring_bad_value(self):
     list_1 = [(2, 2), (3, 'foo')]
     list_1a = (0.0, 0.0)
     assert mod.get_variance_and_stddev(list_1, 2) == list_1a
     assert mod.get_variance_and_stddev(list_1) == list_1a