def analyze_fields(self,
                       field_number=None,
                       field_types_overrides=None,
                       max_freq_number=None):
        """ Determines types, names, and characteristics of fields.

            Inputs:
               - field_number - if None, then analyzes all fields, otherwise
                 analyzes just the single field (based on zero-offset)
            Outputs:
               - populates public class structures
        """
        self.max_freq_number     = max_freq_number

        if self.verbose:
            print 'Field Analysis Progress: '

        for f_no in range(self.field_cnt):
            if field_number is not None:  # optional analysis of a single field
                if f_no != field_number:
                    continue

            if self.verbose:
                print '   Analyzing field: %d' % f_no

            self.field_names[f_no]   = miscer.get_field_names(self.filename,
                                                              self.dialect,
                                                              f_no)

            if max_freq_number is None:
                if field_number is None:
                    max_items = MAX_FREQ_MULTI_COL_DEFAULT
                else:
                    max_items = MAX_FREQ_SINGLE_COL_DEFAULT
            else:
                max_items = max_freq_number

            (self.field_freqs[f_no],
            self.field_trunc[f_no],
            self.field_rows_invalid[f_no]) = miscer.get_field_freq(self.filename,
                                                            self.dialect,
                                                            f_no,
                                                            max_items)

            self.field_types[f_no]  = typer.get_field_type(self.field_freqs[f_no])
            if field_types_overrides:
                for col_no in field_types_overrides:
                    self.field_types[col_no] = field_types_overrides[col_no]


            self.field_max[f_no]    = miscer.get_max(self.field_types[f_no],
                                              self.field_freqs[f_no])
            self.field_min[f_no]    = miscer.get_min(self.field_types[f_no],
                                              self.field_freqs[f_no])

            if self.field_types[f_no] == 'string':
                self.field_case[f_no]  = miscer.get_case(self.field_types[f_no],
                                                         self.field_freqs[f_no])
                self.field_min_length[f_no]  = miscer.get_min_length(self.field_freqs[f_no])
                self.field_max_length[f_no]  = miscer.get_max_length(self.field_freqs[f_no])
                self.field_mean_length[f_no] = mather.get_mean_length(self.field_freqs[f_no])
            else:
                self.field_case[f_no]        = None
                self.field_min_length[f_no]  = None
                self.field_max_length[f_no]  = None
                self.field_mean_length[f_no] = None


            if self.field_types[f_no] in ['integer','float']:
                self.field_mean[f_no]   = mather.get_mean(self.field_freqs[f_no])
                self.field_median[f_no] = mather.GetDictMedian().run(self.field_freqs[f_no])
                (self.variance[f_no], self.stddev[f_no])   \
                   =  mather.get_variance_and_stddev(self.field_freqs[f_no],
                                                     self.field_mean[f_no])
            else:
                self.field_mean[f_no]   = None
                self.field_median[f_no] = None
                self.variance[f_no]     = None
                self.stddev[f_no]       = None
示例#2
0
 def test_math_g05_medium_sets(self):
     assert mod.get_mean_length(self.med_dict_1)  == 1.5
    def analyze_fields(self,
                       field_number=None,
                       field_types_overrides=None,
                       max_freq_number=None):
        """ Determines types, names, and characteristics of fields.

            Inputs:
               - field_number - if None, then analyzes all fields, otherwise
                 analyzes just the single field (based on zero-offset)
            Outputs:
               - populates public class structures
        """
        self.max_freq_number = max_freq_number

        if self.verbose:
            print 'Field Analysis Progress: '

        for f_no in range(self.field_cnt):
            if field_number is not None:  # optional analysis of a single field
                if f_no != field_number:
                    continue

            if self.verbose:
                print '   Analyzing field: %d' % f_no

            self.field_names[f_no] = miscer.get_field_names(
                self.filename, self.dialect, f_no)

            if max_freq_number is None:
                if field_number is None:
                    max_items = MAX_FREQ_MULTI_COL_DEFAULT
                else:
                    max_items = MAX_FREQ_SINGLE_COL_DEFAULT
            else:
                max_items = max_freq_number

            (self.field_freqs[f_no], self.field_trunc[f_no],
             self.field_rows_invalid[f_no]) = miscer.get_field_freq(
                 self.filename, self.dialect, f_no, max_items)

            self.field_types[f_no] = typer.get_field_type(
                self.field_freqs[f_no])
            if field_types_overrides:
                for col_no in field_types_overrides:
                    self.field_types[col_no] = field_types_overrides[col_no]

            self.field_max[f_no] = miscer.get_max(self.field_types[f_no],
                                                  self.field_freqs[f_no])
            self.field_min[f_no] = miscer.get_min(self.field_types[f_no],
                                                  self.field_freqs[f_no])

            if self.field_types[f_no] == 'string':
                self.field_case[f_no] = miscer.get_case(
                    self.field_types[f_no], self.field_freqs[f_no])
                self.field_min_length[f_no] = miscer.get_min_length(
                    self.field_freqs[f_no])
                self.field_max_length[f_no] = miscer.get_max_length(
                    self.field_freqs[f_no])
                self.field_mean_length[f_no] = mather.get_mean_length(
                    self.field_freqs[f_no])
            else:
                self.field_case[f_no] = None
                self.field_min_length[f_no] = None
                self.field_max_length[f_no] = None
                self.field_mean_length[f_no] = None

            if self.field_types[f_no] in ['integer', 'float']:
                self.field_mean[f_no] = mather.get_mean(self.field_freqs[f_no])
                self.field_median[f_no] = mather.GetDictMedian().run(
                    self.field_freqs[f_no])
                (self.variance[f_no], self.stddev[f_no])   \
                   =  mather.get_variance_and_stddev(self.field_freqs[f_no],
                                                     self.field_mean[f_no])
            else:
                self.field_mean[f_no] = None
                self.field_median[f_no] = None
                self.variance[f_no] = None
                self.stddev[f_no] = None
示例#4
0
 def test_math_g02_unknowns(self):
     assert mod.get_mean_length(self.unk_dict)    == 1
示例#5
0
 def test_math_g01_emptiness(self):
     assert mod.get_mean_length(self.empty_dict_1) is None
示例#6
0
 def test_math_g05_medium_sets(self):
     assert (mod.get_mean_length(self.med_dict_1) == 1.5)
示例#7
0
 def test_math_g02_unknowns(self):
     assert (mod.get_mean_length(self.unk_dict) == 1)
示例#8
0
 def test_math_g01_emptiness(self):
     assert (mod.get_mean_length(self.empty_dict_1) is None)