def test_column_stats(self): # build analyser table data = "c1,c2,c3,c4\n" \ "1,0.1,1,\n" \ "2,5.1,2,\n" \ "3,5.2,1,\n" \ "4,5.6,3,\n" \ "5,,4,\n" \ "6,19,6," data_tables = tablemagician.from_file_object(StringIO.StringIO(data)) self.analyser_table = data_tables[0].process() data_tables[0].close() a = ComplexTypeAnalyser() b = ColumnStatsAnalyser() analyser_chain = [a, b] # build engine engine = AnalyserEngine(analyser_chain) # feed with analyser table engine.process(self.analyser_table) stats = self.analyser_table.analysers[ColumnStatsAnalyser.name] self.assertEqual(stats[0]['regression'], 'INCREASE/LINEAR/1.0') self.assertEqual(stats[1]['regression'], 'INCREASE/MONOTONIC') self.assertTrue('regression' not in stats[2])
def test_structure_analyser(self): # build analyser table data_tables = tablemagician.from_path('../parser/testdata/nuts/101.csv') analyser_table = data_tables[0].process(max_lines=100) data_tables[0].close() # test structure analysers a = StructureAnalyser() analyser_chain = [a] # build engine engine = AnalyserEngine(analyser_chain) # feed with analyser table engine.process(analyser_table)
def test_something(self): # build analyser table data_tables = tablemagician.from_path('../parser/testdata/39.csv') analyser_table = data_tables[0].process(max_lines=100) data_tables[0].close() # test analysers a1 = TestAnalyser() a2 = AnotherTestAnalyser() analyser_chain = [a1, a2] # build engine engine = AnalyserEngine(analyser_chain) # feed with analyser table engine.process(analyser_table) self.assertEqual(len(analyser_table.analysers), 2)
def test_type_detection(self): # build analyser table data = "c1,c2,c3,c4\n" \ "12cm,3%,€300.50,\n" \ "1 cm,1%,€ 12.345,\n" \ "1.5 cm,0.5%,€ 130,34.2\n" \ "1 cm,1%,€ 12.345,\n" \ "1.5 cm,0.5%,€ 130,34.2\n" \ "1.5 cm,0.5%,€ 130.1000," data_tables = tablemagician.from_file_object(StringIO.StringIO(data)) self.analyser_table = data_tables[0].process() data_tables[0].close() a = ComplexTypeAnalyser() b = ColumnStatsAnalyser() analyser_chain = [a, b] # build engine engine = AnalyserEngine(analyser_chain) # feed with analyser table engine.process(self.analyser_table) columns = self.analyser_table.analysers[ComplexTypeAnalyser.name] for t in columns[0]: self.assertTrue(t.startswith('NUMALPHA')) self.assertTrue(columns[0]['NUMALPHA/NUMBER/INT:1+-ALPHA:2'] == 2) for t in columns[1]: self.assertTrue(t.startswith('NUMALPHA')) self.assertTrue(columns[1]['NUMALPHA/NUMBER/FLOAT:1.1-ALPHA+:1'] == 3) for t in columns[2]: self.assertTrue(t.startswith('ALPHANUM')) self.assertTrue(columns[2]['ALPHANUM/ALPHA+:1-NUMBER/FLOAT:2.3'] == 2) self.assertTrue(columns[2]['ALPHANUM/ALPHA+:1-NUMBER/FLOAT:3.*'] == 1) self.assertTrue(columns[3]['EMPTY'] == 4) for stats in self.analyser_table.analysers[ColumnStatsAnalyser.name]: print 'ColStats:', stats
import traceback from analyser import AnalyserEngine from column_stats_analyser import ColumnStatsAnalyser from complex_type_analyser import ComplexTypeAnalyser __author__ = 'sebastian' import tablemagician #url = 'http://data.wu.ac.at/dataset/3e4e505f-85cd-4f4c-af43-b547b51fc287/resource/9c2f7b09-f2da-447c-83cd-ea1df37d8e4f/download/allcourses15s.csv' rootdir = 'testdata/nuts' # Build analysers comp = ComplexTypeAnalyser() col = ColumnStatsAnalyser() engine = AnalyserEngine([comp, col]) data = [] # Load a path: for subdir, dirs, files in os.walk(rootdir): for file in files: print file try: datatables = tablemagician.from_path(os.path.join(rootdir, file)) for t in datatables: analyser_table = t.process(max_lines=50) engine.process(analyser_table) stats = analyser_table.analysers[ColumnStatsAnalyser.name] data.append({'name': file, 'header': analyser_table.headers, 'stats': stats})