示例#1
0
    def test_column_stats(self):
        # build analyser table
        data = "c1,c2,c3,c4\n" \
               "1,0.1,1,\n" \
               "2,5.1,2,\n" \
               "3,5.2,1,\n" \
               "4,5.6,3,\n" \
               "5,,4,\n" \
               "6,19,6,"

        data_tables = tablemagician.from_file_object(StringIO.StringIO(data))
        self.analyser_table = data_tables[0].process()
        data_tables[0].close()

        a = ComplexTypeAnalyser()
        b = ColumnStatsAnalyser()

        analyser_chain = [a, b]
        # build engine
        engine = AnalyserEngine(analyser_chain)
        # feed with analyser table
        engine.process(self.analyser_table)

        stats = self.analyser_table.analysers[ColumnStatsAnalyser.name]
        self.assertEqual(stats[0]['regression'], 'INCREASE/LINEAR/1.0')
        self.assertEqual(stats[1]['regression'], 'INCREASE/MONOTONIC')
        self.assertTrue('regression' not in stats[2])
示例#2
0
    def test_structure_analyser(self):
        # build analyser table
        data_tables = tablemagician.from_path('../parser/testdata/nuts/101.csv')
        analyser_table = data_tables[0].process(max_lines=100)
        data_tables[0].close()

        # test structure analysers
        a = StructureAnalyser()

        analyser_chain = [a]
        # build engine
        engine = AnalyserEngine(analyser_chain)
        # feed with analyser table
        engine.process(analyser_table)
示例#3
0
    def test_something(self):
        # build analyser table
        data_tables = tablemagician.from_path('../parser/testdata/39.csv')
        analyser_table = data_tables[0].process(max_lines=100)
        data_tables[0].close()

        # test analysers
        a1 = TestAnalyser()
        a2 = AnotherTestAnalyser()
        analyser_chain = [a1, a2]
        # build engine
        engine = AnalyserEngine(analyser_chain)
        # feed with analyser table
        engine.process(analyser_table)

        self.assertEqual(len(analyser_table.analysers), 2)
示例#4
0
    def test_type_detection(self):
        # build analyser table
        data = "c1,c2,c3,c4\n" \
               "12cm,3%,€300.50,\n" \
               "1 cm,1%,€ 12.345,\n" \
               "1.5 cm,0.5%,€ 130,34.2\n" \
               "1 cm,1%,€ 12.345,\n" \
               "1.5 cm,0.5%,€ 130,34.2\n" \
               "1.5 cm,0.5%,€ 130.1000,"

        data_tables = tablemagician.from_file_object(StringIO.StringIO(data))
        self.analyser_table = data_tables[0].process()
        data_tables[0].close()

        a = ComplexTypeAnalyser()
        b = ColumnStatsAnalyser()

        analyser_chain = [a, b]
        # build engine
        engine = AnalyserEngine(analyser_chain)
        # feed with analyser table
        engine.process(self.analyser_table)

        columns = self.analyser_table.analysers[ComplexTypeAnalyser.name]
        for t in columns[0]:
            self.assertTrue(t.startswith('NUMALPHA'))

        self.assertTrue(columns[0]['NUMALPHA/NUMBER/INT:1+-ALPHA:2'] == 2)

        for t in columns[1]:
            self.assertTrue(t.startswith('NUMALPHA'))

        self.assertTrue(columns[1]['NUMALPHA/NUMBER/FLOAT:1.1-ALPHA+:1'] == 3)

        for t in columns[2]:
            self.assertTrue(t.startswith('ALPHANUM'))

        self.assertTrue(columns[2]['ALPHANUM/ALPHA+:1-NUMBER/FLOAT:2.3'] == 2)
        self.assertTrue(columns[2]['ALPHANUM/ALPHA+:1-NUMBER/FLOAT:3.*'] == 1)
        self.assertTrue(columns[3]['EMPTY'] == 4)

        for stats in self.analyser_table.analysers[ColumnStatsAnalyser.name]:
            print 'ColStats:', stats
示例#5
0
import traceback
from analyser import AnalyserEngine
from column_stats_analyser import ColumnStatsAnalyser
from complex_type_analyser import ComplexTypeAnalyser

__author__ = 'sebastian'

import tablemagician

#url = 'http://data.wu.ac.at/dataset/3e4e505f-85cd-4f4c-af43-b547b51fc287/resource/9c2f7b09-f2da-447c-83cd-ea1df37d8e4f/download/allcourses15s.csv'
rootdir = 'testdata/nuts'

# Build analysers
comp = ComplexTypeAnalyser()
col = ColumnStatsAnalyser()
engine = AnalyserEngine([comp, col])

data = []

# Load a path:
for subdir, dirs, files in os.walk(rootdir):
    for file in files:
        print file
        try:
            datatables = tablemagician.from_path(os.path.join(rootdir, file))

            for t in datatables:
                analyser_table = t.process(max_lines=50)
                engine.process(analyser_table)
                stats = analyser_table.analysers[ColumnStatsAnalyser.name]
                data.append({'name': file, 'header': analyser_table.headers, 'stats': stats})