示例#1
0
    def test_misc_c03_empty(self):
        # test with header:
        assert mod.get_field_names(self.empty_fqfn, self.dialect) is None
        assert mod.get_field_names(self.empty_fqfn, self.dialect, 1) is None

        # test without header
        self.dialect.has_header = False
        assert mod.get_field_names(self.empty_fqfn, self.dialect) is None
        assert mod.get_field_names(self.empty_fqfn, self.dialect, 1) is None
示例#2
0
    def test_misc_c03_empty(self):
        # test with header:
        assert(mod.get_field_names(self.empty_fqfn, self.dialect) == None)
        assert(mod.get_field_names(self.empty_fqfn, self.dialect, 1) == None)

        # test without header
        self.dialect.has_header = False
        assert(mod.get_field_names(self.empty_fqfn, self.dialect) == None)
        assert(mod.get_field_names(self.empty_fqfn, self.dialect, 1) == None)
示例#3
0
    def test_misc_c04_noquote(self):

        assert(mod.get_field_names(self.noquote_fqfn,
                                   self.dialect) == self.name_list)
        assert(mod.get_field_names(self.noquote_fqfn, 
                                   self.dialect, 1) == 'phone')
示例#4
0
 def test_misc_c02_headless_one_col(self):
     self.dialect.has_header = False
     assert(mod.get_field_names(self.headless_fqfn, self.dialect, 1) \
             == 'field_1')
示例#5
0
 def test_misc_c02_headless_all_col(self):
     self.dialect.has_header = False
     assert(mod.get_field_names(self.headless_fqfn, self.dialect) \
             == ['field_0','field_1','field_2','field_3'])
示例#6
0
    def test_misc_c01_header_one_col(self):

        assert(mod.get_field_names(self.header_fqfn, self.dialect, 1)  \
                == 'phone')
示例#7
0
    def test_misc_c01_header_all_cols(self):

        assert(mod.get_field_names(self.header_fqfn,
                                   self.dialect) == self.name_list)
示例#8
0
    def test_misc_c04_noquote(self):

        assert mod.get_field_names(self.noquote_fqfn,
                                   self.dialect) == self.name_list
        assert mod.get_field_names(self.noquote_fqfn,
                                   self.dialect, 1) == 'phone'
示例#9
0
 def test_misc_c02_headless_one_col(self):
     self.dialect.has_header = False
     assert mod.get_field_names(self.headless_fqfn, self.dialect, 1) \
             == 'field_1'
示例#10
0
 def test_misc_c02_headless_all_col(self):
     self.dialect.has_header = False
     assert mod.get_field_names(self.headless_fqfn, self.dialect) \
             == ['field_0','field_1','field_2','field_3']
示例#11
0
    def test_misc_c01_header_one_col(self):

        assert mod.get_field_names(self.header_fqfn, 
                                   self.dialect, 1)  == 'phone'
示例#12
0
    def test_misc_c01_header_all_cols(self):

        assert mod.get_field_names(self.header_fqfn,
                                   self.dialect) == self.name_list
示例#13
0
    def analyze_fields(self,
                       field_number=None,
                       field_types_overrides=None,
                       max_freq_number=None):
        """ Determines types, names, and characteristics of fields.

            Inputs:
               - field_number - if None, then analyzes all fields, otherwise
                 analyzes just the single field (based on zero-offset)
            Outputs:
               - populates public class structures
        """
        self.max_freq_number     = max_freq_number

        if self.verbose:
            print 'Field Analysis Progress: '

        for f_no in range(self.field_cnt):
            if field_number is not None:  # optional analysis of a single field
                if f_no != field_number:
                    continue

            if self.verbose:
                print '   Analyzing field: %d' % f_no

            self.field_names[f_no]   = miscer.get_field_names(self.filename,
                                                              self.dialect,
                                                              f_no)

            if max_freq_number is None:
                if field_number is None:
                    max_items = MAX_FREQ_MULTI_COL_DEFAULT
                else:
                    max_items = MAX_FREQ_SINGLE_COL_DEFAULT
            else:
                max_items = max_freq_number

            (self.field_freqs[f_no],
            self.field_trunc[f_no],
            self.field_rows_invalid[f_no]) = miscer.get_field_freq(self.filename,
                                                            self.dialect,
                                                            f_no,
                                                            max_items)

            self.field_types[f_no]  = typer.get_field_type(self.field_freqs[f_no])
            if field_types_overrides:
                for col_no in field_types_overrides:
                    self.field_types[col_no] = field_types_overrides[col_no]


            self.field_max[f_no]    = miscer.get_max(self.field_types[f_no],
                                              self.field_freqs[f_no])
            self.field_min[f_no]    = miscer.get_min(self.field_types[f_no],
                                              self.field_freqs[f_no])

            if self.field_types[f_no] == 'string':
                self.field_case[f_no]  = miscer.get_case(self.field_types[f_no],
                                                         self.field_freqs[f_no])
                self.field_min_length[f_no]  = miscer.get_min_length(self.field_freqs[f_no])
                self.field_max_length[f_no]  = miscer.get_max_length(self.field_freqs[f_no])
                self.field_mean_length[f_no] = mather.get_mean_length(self.field_freqs[f_no])
            else:
                self.field_case[f_no]        = None
                self.field_min_length[f_no]  = None
                self.field_max_length[f_no]  = None
                self.field_mean_length[f_no] = None


            if self.field_types[f_no] in ['integer','float']:
                self.field_mean[f_no]   = mather.get_mean(self.field_freqs[f_no])
                self.field_median[f_no] = mather.GetDictMedian().run(self.field_freqs[f_no])
                (self.variance[f_no], self.stddev[f_no])   \
                   =  mather.get_variance_and_stddev(self.field_freqs[f_no],
                                                     self.field_mean[f_no])
            else:
                self.field_mean[f_no]   = None
                self.field_median[f_no] = None
                self.variance[f_no]     = None
                self.stddev[f_no]       = None
示例#14
0
    def analyze_fields(self,
                       field_number=None,
                       field_types_overrides=None,
                       max_freq_number=None):
        """ Determines types, names, and characteristics of fields.

            Inputs:
               - field_number - if None, then analyzes all fields, otherwise
                 analyzes just the single field (based on zero-offset)
            Outputs:
               - populates public class structures
        """
        self.max_freq_number = max_freq_number

        if self.verbose:
            print 'Field Analysis Progress: '

        for f_no in range(self.field_cnt):
            if field_number is not None:  # optional analysis of a single field
                if f_no != field_number:
                    continue

            if self.verbose:
                print '   Analyzing field: %d' % f_no

            self.field_names[f_no] = miscer.get_field_names(
                self.filename, self.dialect, f_no)

            if max_freq_number is None:
                if field_number is None:
                    max_items = MAX_FREQ_MULTI_COL_DEFAULT
                else:
                    max_items = MAX_FREQ_SINGLE_COL_DEFAULT
            else:
                max_items = max_freq_number

            (self.field_freqs[f_no], self.field_trunc[f_no],
             self.field_rows_invalid[f_no]) = miscer.get_field_freq(
                 self.filename, self.dialect, f_no, max_items)

            self.field_types[f_no] = typer.get_field_type(
                self.field_freqs[f_no])
            if field_types_overrides:
                for col_no in field_types_overrides:
                    self.field_types[col_no] = field_types_overrides[col_no]

            self.field_max[f_no] = miscer.get_max(self.field_types[f_no],
                                                  self.field_freqs[f_no])
            self.field_min[f_no] = miscer.get_min(self.field_types[f_no],
                                                  self.field_freqs[f_no])

            if self.field_types[f_no] == 'string':
                self.field_case[f_no] = miscer.get_case(
                    self.field_types[f_no], self.field_freqs[f_no])
                self.field_min_length[f_no] = miscer.get_min_length(
                    self.field_freqs[f_no])
                self.field_max_length[f_no] = miscer.get_max_length(
                    self.field_freqs[f_no])
                self.field_mean_length[f_no] = mather.get_mean_length(
                    self.field_freqs[f_no])
            else:
                self.field_case[f_no] = None
                self.field_min_length[f_no] = None
                self.field_max_length[f_no] = None
                self.field_mean_length[f_no] = None

            if self.field_types[f_no] in ['integer', 'float']:
                self.field_mean[f_no] = mather.get_mean(self.field_freqs[f_no])
                self.field_median[f_no] = mather.GetDictMedian().run(
                    self.field_freqs[f_no])
                (self.variance[f_no], self.stddev[f_no])   \
                   =  mather.get_variance_and_stddev(self.field_freqs[f_no],
                                                     self.field_mean[f_no])
            else:
                self.field_mean[f_no] = None
                self.field_median[f_no] = None
                self.variance[f_no] = None
                self.stddev[f_no] = None