def test_misc_c03_empty(self): # test with header: assert mod.get_field_names(self.empty_fqfn, self.dialect) is None assert mod.get_field_names(self.empty_fqfn, self.dialect, 1) is None # test without header self.dialect.has_header = False assert mod.get_field_names(self.empty_fqfn, self.dialect) is None assert mod.get_field_names(self.empty_fqfn, self.dialect, 1) is None
def test_misc_c03_empty(self): # test with header: assert(mod.get_field_names(self.empty_fqfn, self.dialect) == None) assert(mod.get_field_names(self.empty_fqfn, self.dialect, 1) == None) # test without header self.dialect.has_header = False assert(mod.get_field_names(self.empty_fqfn, self.dialect) == None) assert(mod.get_field_names(self.empty_fqfn, self.dialect, 1) == None)
def test_misc_c04_noquote(self): assert(mod.get_field_names(self.noquote_fqfn, self.dialect) == self.name_list) assert(mod.get_field_names(self.noquote_fqfn, self.dialect, 1) == 'phone')
def test_misc_c02_headless_one_col(self): self.dialect.has_header = False assert(mod.get_field_names(self.headless_fqfn, self.dialect, 1) \ == 'field_1')
def test_misc_c02_headless_all_col(self): self.dialect.has_header = False assert(mod.get_field_names(self.headless_fqfn, self.dialect) \ == ['field_0','field_1','field_2','field_3'])
def test_misc_c01_header_one_col(self): assert(mod.get_field_names(self.header_fqfn, self.dialect, 1) \ == 'phone')
def test_misc_c01_header_all_cols(self): assert(mod.get_field_names(self.header_fqfn, self.dialect) == self.name_list)
def test_misc_c04_noquote(self): assert mod.get_field_names(self.noquote_fqfn, self.dialect) == self.name_list assert mod.get_field_names(self.noquote_fqfn, self.dialect, 1) == 'phone'
def test_misc_c02_headless_one_col(self): self.dialect.has_header = False assert mod.get_field_names(self.headless_fqfn, self.dialect, 1) \ == 'field_1'
def test_misc_c02_headless_all_col(self): self.dialect.has_header = False assert mod.get_field_names(self.headless_fqfn, self.dialect) \ == ['field_0','field_1','field_2','field_3']
def test_misc_c01_header_one_col(self): assert mod.get_field_names(self.header_fqfn, self.dialect, 1) == 'phone'
def test_misc_c01_header_all_cols(self): assert mod.get_field_names(self.header_fqfn, self.dialect) == self.name_list
def analyze_fields(self, field_number=None, field_types_overrides=None, max_freq_number=None): """ Determines types, names, and characteristics of fields. Inputs: - field_number - if None, then analyzes all fields, otherwise analyzes just the single field (based on zero-offset) Outputs: - populates public class structures """ self.max_freq_number = max_freq_number if self.verbose: print 'Field Analysis Progress: ' for f_no in range(self.field_cnt): if field_number is not None: # optional analysis of a single field if f_no != field_number: continue if self.verbose: print ' Analyzing field: %d' % f_no self.field_names[f_no] = miscer.get_field_names(self.filename, self.dialect, f_no) if max_freq_number is None: if field_number is None: max_items = MAX_FREQ_MULTI_COL_DEFAULT else: max_items = MAX_FREQ_SINGLE_COL_DEFAULT else: max_items = max_freq_number (self.field_freqs[f_no], self.field_trunc[f_no], self.field_rows_invalid[f_no]) = miscer.get_field_freq(self.filename, self.dialect, f_no, max_items) self.field_types[f_no] = typer.get_field_type(self.field_freqs[f_no]) if field_types_overrides: for col_no in field_types_overrides: self.field_types[col_no] = field_types_overrides[col_no] self.field_max[f_no] = miscer.get_max(self.field_types[f_no], self.field_freqs[f_no]) self.field_min[f_no] = miscer.get_min(self.field_types[f_no], self.field_freqs[f_no]) if self.field_types[f_no] == 'string': self.field_case[f_no] = miscer.get_case(self.field_types[f_no], self.field_freqs[f_no]) self.field_min_length[f_no] = miscer.get_min_length(self.field_freqs[f_no]) self.field_max_length[f_no] = miscer.get_max_length(self.field_freqs[f_no]) self.field_mean_length[f_no] = mather.get_mean_length(self.field_freqs[f_no]) else: self.field_case[f_no] = None self.field_min_length[f_no] = None self.field_max_length[f_no] = None self.field_mean_length[f_no] = None if self.field_types[f_no] in ['integer','float']: self.field_mean[f_no] = mather.get_mean(self.field_freqs[f_no]) self.field_median[f_no] = mather.GetDictMedian().run(self.field_freqs[f_no]) (self.variance[f_no], self.stddev[f_no]) \ = mather.get_variance_and_stddev(self.field_freqs[f_no], self.field_mean[f_no]) else: self.field_mean[f_no] = None self.field_median[f_no] = None self.variance[f_no] = None self.stddev[f_no] = None
def analyze_fields(self, field_number=None, field_types_overrides=None, max_freq_number=None): """ Determines types, names, and characteristics of fields. Inputs: - field_number - if None, then analyzes all fields, otherwise analyzes just the single field (based on zero-offset) Outputs: - populates public class structures """ self.max_freq_number = max_freq_number if self.verbose: print 'Field Analysis Progress: ' for f_no in range(self.field_cnt): if field_number is not None: # optional analysis of a single field if f_no != field_number: continue if self.verbose: print ' Analyzing field: %d' % f_no self.field_names[f_no] = miscer.get_field_names( self.filename, self.dialect, f_no) if max_freq_number is None: if field_number is None: max_items = MAX_FREQ_MULTI_COL_DEFAULT else: max_items = MAX_FREQ_SINGLE_COL_DEFAULT else: max_items = max_freq_number (self.field_freqs[f_no], self.field_trunc[f_no], self.field_rows_invalid[f_no]) = miscer.get_field_freq( self.filename, self.dialect, f_no, max_items) self.field_types[f_no] = typer.get_field_type( self.field_freqs[f_no]) if field_types_overrides: for col_no in field_types_overrides: self.field_types[col_no] = field_types_overrides[col_no] self.field_max[f_no] = miscer.get_max(self.field_types[f_no], self.field_freqs[f_no]) self.field_min[f_no] = miscer.get_min(self.field_types[f_no], self.field_freqs[f_no]) if self.field_types[f_no] == 'string': self.field_case[f_no] = miscer.get_case( self.field_types[f_no], self.field_freqs[f_no]) self.field_min_length[f_no] = miscer.get_min_length( self.field_freqs[f_no]) self.field_max_length[f_no] = miscer.get_max_length( self.field_freqs[f_no]) self.field_mean_length[f_no] = mather.get_mean_length( self.field_freqs[f_no]) else: self.field_case[f_no] = None self.field_min_length[f_no] = None self.field_max_length[f_no] = None self.field_mean_length[f_no] = None if self.field_types[f_no] in ['integer', 'float']: self.field_mean[f_no] = mather.get_mean(self.field_freqs[f_no]) self.field_median[f_no] = mather.GetDictMedian().run( self.field_freqs[f_no]) (self.variance[f_no], self.stddev[f_no]) \ = mather.get_variance_and_stddev(self.field_freqs[f_no], self.field_mean[f_no]) else: self.field_mean[f_no] = None self.field_median[f_no] = None self.variance[f_no] = None self.stddev[f_no] = None