Exemplo n.º 1
0
    def test_misc_a01(self):
        assert(mod.get_case('string', self.test_u1) == 'upper')
        assert(mod.get_case('string', self.test_u2) == 'upper')

        assert(mod.get_case('string', self.test_m1) == 'lower')
        assert(mod.get_case('string', self.test_m2) == 'mixed')

        assert(mod.get_case('string', self.test_unk1) == 'unknown')
        assert(mod.get_case('string', self.test_unk2) == 'unknown')
Exemplo n.º 2
0
    def test_misc_a01(self):
        assert mod.get_case('string', self.test_u1) == 'upper'
        assert mod.get_case('string', self.test_u2) == 'upper'

        assert mod.get_case('string', self.test_m1) == 'lower'
        assert mod.get_case('string', self.test_m2) == 'mixed'

        assert mod.get_case('string', self.test_unk1) == 'unknown'
        assert mod.get_case('string', self.test_unk2) == 'unknown'
Exemplo n.º 3
0
    def analyze_fields(self,
                       field_number=None,
                       field_types_overrides=None,
                       max_freq_number=None):
        """ Determines types, names, and characteristics of fields.

            Inputs:
               - field_number - if None, then analyzes all fields, otherwise
                 analyzes just the single field (based on zero-offset)
            Outputs:
               - populates public class structures
        """
        self.max_freq_number     = max_freq_number

        if self.verbose:
            print 'Field Analysis Progress: '

        for f_no in range(self.field_cnt):
            if field_number is not None:  # optional analysis of a single field
                if f_no != field_number:
                    continue

            if self.verbose:
                print '   Analyzing field: %d' % f_no

            self.field_names[f_no]   = miscer.get_field_names(self.filename,
                                                              self.dialect,
                                                              f_no)

            if max_freq_number is None:
                if field_number is None:
                    max_items = MAX_FREQ_MULTI_COL_DEFAULT
                else:
                    max_items = MAX_FREQ_SINGLE_COL_DEFAULT
            else:
                max_items = max_freq_number

            (self.field_freqs[f_no],
            self.field_trunc[f_no],
            self.field_rows_invalid[f_no]) = miscer.get_field_freq(self.filename,
                                                            self.dialect,
                                                            f_no,
                                                            max_items)

            self.field_types[f_no]  = typer.get_field_type(self.field_freqs[f_no])
            if field_types_overrides:
                for col_no in field_types_overrides:
                    self.field_types[col_no] = field_types_overrides[col_no]


            self.field_max[f_no]    = miscer.get_max(self.field_types[f_no],
                                              self.field_freqs[f_no])
            self.field_min[f_no]    = miscer.get_min(self.field_types[f_no],
                                              self.field_freqs[f_no])

            if self.field_types[f_no] == 'string':
                self.field_case[f_no]  = miscer.get_case(self.field_types[f_no],
                                                         self.field_freqs[f_no])
                self.field_min_length[f_no]  = miscer.get_min_length(self.field_freqs[f_no])
                self.field_max_length[f_no]  = miscer.get_max_length(self.field_freqs[f_no])
                self.field_mean_length[f_no] = mather.get_mean_length(self.field_freqs[f_no])
            else:
                self.field_case[f_no]        = None
                self.field_min_length[f_no]  = None
                self.field_max_length[f_no]  = None
                self.field_mean_length[f_no] = None


            if self.field_types[f_no] in ['integer','float']:
                self.field_mean[f_no]   = mather.get_mean(self.field_freqs[f_no])
                self.field_median[f_no] = mather.GetDictMedian().run(self.field_freqs[f_no])
                (self.variance[f_no], self.stddev[f_no])   \
                   =  mather.get_variance_and_stddev(self.field_freqs[f_no],
                                                     self.field_mean[f_no])
            else:
                self.field_mean[f_no]   = None
                self.field_median[f_no] = None
                self.variance[f_no]     = None
                self.stddev[f_no]       = None
Exemplo n.º 4
0
    def analyze_fields(self,
                       field_number=None,
                       field_types_overrides=None,
                       max_freq_number=None):
        """ Determines types, names, and characteristics of fields.

            Inputs:
               - field_number - if None, then analyzes all fields, otherwise
                 analyzes just the single field (based on zero-offset)
            Outputs:
               - populates public class structures
        """
        self.max_freq_number = max_freq_number

        if self.verbose:
            print 'Field Analysis Progress: '

        for f_no in range(self.field_cnt):
            if field_number is not None:  # optional analysis of a single field
                if f_no != field_number:
                    continue

            if self.verbose:
                print '   Analyzing field: %d' % f_no

            self.field_names[f_no] = miscer.get_field_names(
                self.filename, self.dialect, f_no)

            if max_freq_number is None:
                if field_number is None:
                    max_items = MAX_FREQ_MULTI_COL_DEFAULT
                else:
                    max_items = MAX_FREQ_SINGLE_COL_DEFAULT
            else:
                max_items = max_freq_number

            (self.field_freqs[f_no], self.field_trunc[f_no],
             self.field_rows_invalid[f_no]) = miscer.get_field_freq(
                 self.filename, self.dialect, f_no, max_items)

            self.field_types[f_no] = typer.get_field_type(
                self.field_freqs[f_no])
            if field_types_overrides:
                for col_no in field_types_overrides:
                    self.field_types[col_no] = field_types_overrides[col_no]

            self.field_max[f_no] = miscer.get_max(self.field_types[f_no],
                                                  self.field_freqs[f_no])
            self.field_min[f_no] = miscer.get_min(self.field_types[f_no],
                                                  self.field_freqs[f_no])

            if self.field_types[f_no] == 'string':
                self.field_case[f_no] = miscer.get_case(
                    self.field_types[f_no], self.field_freqs[f_no])
                self.field_min_length[f_no] = miscer.get_min_length(
                    self.field_freqs[f_no])
                self.field_max_length[f_no] = miscer.get_max_length(
                    self.field_freqs[f_no])
                self.field_mean_length[f_no] = mather.get_mean_length(
                    self.field_freqs[f_no])
            else:
                self.field_case[f_no] = None
                self.field_min_length[f_no] = None
                self.field_max_length[f_no] = None
                self.field_mean_length[f_no] = None

            if self.field_types[f_no] in ['integer', 'float']:
                self.field_mean[f_no] = mather.get_mean(self.field_freqs[f_no])
                self.field_median[f_no] = mather.GetDictMedian().run(
                    self.field_freqs[f_no])
                (self.variance[f_no], self.stddev[f_no])   \
                   =  mather.get_variance_and_stddev(self.field_freqs[f_no],
                                                     self.field_mean[f_no])
            else:
                self.field_mean[f_no] = None
                self.field_median[f_no] = None
                self.variance[f_no] = None
                self.stddev[f_no] = None