def test_bf_03_single_col(self):
     sampling_method = 'non'
     sampling_rate   = None
     self.columns    = [1]
     field_freq, truncated = mod.build_freq(self.files, self.dialect, self.columns, self.number, sampling_method, sampling_rate)
     assert(not truncated)
     assert(sum(field_freq.values()) == 1000)
     assert(len(field_freq) == 4)  # it's possible (but extremely unlikely) that there could be fewer entries
     for key in field_freq.keys():
         assert(key[0] in ['A1','A2','A3','A4'])
 def test_bf_01_multicol(self):
     sampling_method = 'non'
     sampling_rate   = None
     field_freq, truncated = mod.build_freq(self.files, self.dialect, self.columns, self.number, sampling_method, sampling_rate)
     assert(not truncated)
     assert(sum(field_freq.values()) == 1000)
     assert(len(field_freq) == 8)               
     for key in field_freq.keys():
         assert(key[0] in ['A1','A2','A3','A4'])
         assert(key[1] in ['B1','B2'])
 def test_bf_02_multicol_and_truncation(self):
     sampling_method = 'non'
     sampling_rate   = None
     self.number     = 4
     field_freq, truncated = mod.build_freq(self.files, self.dialect, self.columns, self.number, sampling_method, sampling_rate)
     assert(truncated)
     assert(len(field_freq) == 4)  # it's possible (but extremely unlikely) that there could be fewer entries
     for key in field_freq.keys():
         assert(key[0] in ['A1','A2','A3','A4'])
         assert(key[1] in ['B1','B2'])
 def test_bf_03_interval_sampling(self):
     sampling_method = 'interval'
     sampling_rate   = 10
     field_freq, truncated = mod.build_freq(self.files, self.dialect, self.columns, self.number, sampling_method, sampling_rate)
     assert(not truncated)
     assert(sum(field_freq.values()) == 100)
     assert(len(field_freq) == 8)  # it's possible (but unlikely) that there could be fewer entries
     for key in field_freq.keys():
         assert(key[0] in ['A1','A2','A3','A4'])
         assert(key[1] in ['B1','B2'])