示例#1
0
    def test_from_csv_builtin(self):
        import csv
        from agate import table
        table.csv = csv

        if six.PY2:
            with self.assertRaises(UnicodeDecodeError):
                output = Table.from_csv('examples/test.csv', self.columns)
        else:
            output = Table.from_csv('examples/test.csv', self.columns)

            self.assertEqual(len(output.columns), 3)
示例#2
0
    def test_from_csv_builtin(self):
        import csv
        from agate import table
        table.csv = csv

        if six.PY2:
            with self.assertRaises(UnicodeDecodeError):
                table = Table.from_csv('examples/test.csv', self.columns)
        else:
            table = Table.from_csv('examples/test.csv', self.columns)

            self.assertEqual(len(table.columns), 3)
示例#3
0
    def test_from_csv_file_like_object(self):
        table1 = Table.from_csv('examples/test.csv', self.columns)

        with open('examples/test.csv') as fh:
            table2 = Table.from_csv(fh, self.columns)

            self.assertSequenceEqual(table1.column_names, table2.column_names)
            self.assertSequenceEqual(table1.column_types, table2.column_types)

            self.assertEqual(len(table1.columns), len(table2.columns))
            self.assertEqual(len(table1.rows), len(table2.rows))

            self.assertSequenceEqual(table1.rows[0], table2.rows[0])
            self.assertSequenceEqual(table1.rows[1], table2.rows[1])
            self.assertSequenceEqual(table1.rows[2], table2.rows[2])
示例#4
0
    def test_from_csv_file_like_object(self):
        table1 = Table.from_csv('examples/test.csv', self.column_names, self.column_types)

        with open('examples/test.csv') as fh:
            table2 = Table.from_csv(fh, self.column_names, self.column_types)

            self.assertSequenceEqual(table1.column_names, table2.column_names)
            self.assertSequenceEqual(table1.column_types, table2.column_types)

            self.assertEqual(len(table1.columns), len(table2.columns))
            self.assertEqual(len(table1.rows), len(table2.rows))

            self.assertSequenceEqual(table1.rows[0], table2.rows[0])
            self.assertSequenceEqual(table1.rows[1], table2.rows[1])
            self.assertSequenceEqual(table1.rows[2], table2.rows[2])
示例#5
0
    def test_from_csv_type_tester(self):
        tester = TypeTester(force={'number': Text()})

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(
            table, [Text, Text, Boolean, Date, DateTime, TimeDelta])
示例#6
0
    def test_from_csv_default_type_tester(self):
        output = Table.from_csv('examples/test.csv')

        self.assertEqual(len(output.columns), 3)
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
示例#7
0
    def test_from_csv_default_type_tester(self):
        output = Table.from_csv('examples/test.csv')

        self.assertEqual(len(output.columns), 3)
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
示例#8
0
 def _get_promise_table(self):
     """ Get the answers from the voting advice application
     """
     print "Get election machine data (YLE)"
     table = Table.from_csv("yle-vaalikone-2015.csv")\
         .where(lambda row: row["valittu"] == 1)\
         .select(["nimi", "puolue", self.question])
     self.promise_table = rename_column(table, self.question, "promise")
示例#9
0
    def test_from_csv_no_header_columns(self):
        table = Table.from_csv('examples/test_no_header.csv',
                               self.column_names,
                               header=False)

        self.assertColumnNames(table, self.column_names)
        self.assertColumnTypes(
            table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
示例#10
0
    def test_from_csv_no_header_type_inference(self):
        output = Table.from_csv('examples/test_no_header.csv', header=False)

        self.assertEqual(len(output.columns), 3)
        self.assertSequenceEqual(output.column_names, ('A', 'B', 'C'))
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
示例#11
0
    def test_from_csv_skip_lines(self):
        table1 = Table(self.rows[1:], column_types=self.column_types)
        table2 = Table.from_csv('examples/test.csv', header=False, skip_lines=2)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
    def test_from_fixed(self):
        table1 = Table.from_csv('examples/testfixed_converted.csv')
        table2 = Table.from_fixed('examples/testfixed', 'examples/testfixed_schema.csv')

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [type(c) for c in table1.column_types])

        self.assertRows(table2, table1.rows)
示例#13
0
    def test_from_csv_type_tester(self):
        tester = TypeTester(force={
            'number': Text()
        })

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(table, [Text, Text, Boolean, Date, DateTime, TimeDelta])
示例#14
0
    def test_from_csv_sniff_limit(self):
        table1 = Table(self.rows, self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=None)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#15
0
    def test_from_csv_no_header_type_inference(self):
        output = Table.from_csv('examples/test_no_header.csv', None, header=False)

        self.assertEqual(len(output.columns), 3)
        self.assertSequenceEqual(output.column_names, ('A', 'B', 'C'))
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
示例#16
0
    def test_from_csv_sniff_limit(self):
        table1 = Table(self.rows, self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=None)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#17
0
    def test_from_fixed(self):
        table1 = Table.from_csv("examples/testfixed_converted.csv")
        table2 = Table.from_fixed("examples/testfixed", "examples/testfixed_schema.csv")

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [type(c) for c in table1.column_types])

        self.assertRows(table2, table1.rows)
示例#18
0
    def test_from_csv_skip_lines_sequence(self):
        table1 = Table([self.rows[1]], column_names=self.column_names, column_types=self.column_types)
        table2 = Table.from_csv('examples/test.csv', skip_lines=(1, 3))

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#19
0
    def test_from_csv_file_like_object(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        table1 = Table.from_csv('examples/test.csv', self.columns)
        with open('examples/test.csv') as fh:
            table2 = Table.from_csv(fh, self.columns)

            self.assertSequenceEqual(table1.get_column_names(), table2.get_column_names())
            self.assertSequenceEqual(table1.get_column_types(), table2.get_column_types())

            self.assertEqual(len(table1.columns), len(table2.columns))
            self.assertEqual(len(table1.rows), len(table2.rows))

            self.assertSequenceEqual(table1.rows[0], table2.rows[0])
            self.assertSequenceEqual(table1.rows[1], table2.rows[1])
            self.assertSequenceEqual(table1.rows[2], table2.rows[2])
示例#20
0
    def test_from_csv_row_limit_too_high(self):
        table1 = Table(self.rows, self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test.csv', row_limit=200)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(
            table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#21
0
    def test_from_csv_no_header(self):
        warnings.simplefilter('ignore')

        try:
            table = Table.from_csv('examples/test_no_header.csv', header=False)
        finally:
            warnings.resetwarnings()

        self.assertColumnNames(table, ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
示例#22
0
    def test_from_csv_file_like_object(self):
        table1 = Table(self.rows, self.column_names, self.column_types)

        with io.open('examples/test.csv', encoding='utf-8') as f:
            table2 = Table.from_csv(f)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#23
0
    def test_from_csv_file_like_object(self):
        table1 = Table(self.rows, self.column_names, self.column_types)

        with io.open('examples/test.csv', encoding='utf-8') as f:
            table2 = Table.from_csv(f)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#24
0
    def test_from_csv_default_type_tester(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        table = Table.from_csv('examples/test.csv')

        self.assertEqual(len(table.columns), 3)
        self.assertIsInstance(table.columns[0].data_type, Number)
        self.assertIsInstance(table.columns[1].data_type, Number)
        self.assertIsInstance(table.columns[2].data_type, Text)
示例#25
0
    def test_from_csv_default_type_tester(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        table = Table.from_csv('examples/test.csv')

        self.assertEqual(len(table.columns), 3)
        self.assertIsInstance(table.columns[0].data_type, Number)
        self.assertIsInstance(table.columns[1].data_type, Number)
        self.assertIsInstance(table.columns[2].data_type, Text)
示例#26
0
    def test_from_csv_row_limit_no_header_columns(self):
        table1 = Table(self.rows[:2], self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test_no_header.csv',
                                self.column_names,
                                header=False,
                                row_limit=2)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(
            table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#27
0
    def test_from_csv_no_header(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        output = Table.from_csv('examples/test_no_header.csv', None, header=False)

        self.assertEqual(len(output.columns), 3)
        self.assertSequenceEqual(output.column_names, ('A', 'B', 'C'))
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
示例#28
0
    def test_from_csv(self):
        table1 = Table(self.rows, self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test.csv', self.column_names, self.column_types)

        self.assertSequenceEqual(table1.column_names, table2.column_names)
        self.assertSequenceEqual(table1.column_types, table2.column_types)

        self.assertEqual(len(table1.columns), len(table2.columns))
        self.assertEqual(len(table1.rows), len(table2.rows))

        self.assertSequenceEqual(table1.rows[0], table2.rows[0])
        self.assertSequenceEqual(table1.rows[1], table2.rows[1])
        self.assertSequenceEqual(table1.rows[2], table2.rows[2])
示例#29
0
    def test_from_csv_type_tester(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        tester = TypeTester()

        output = Table.from_csv('examples/test.csv', tester)

        self.assertEqual(len(output.columns), 3)
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
示例#30
0
    def test_from_csv_type_tester(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        tester = TypeTester()

        output = Table.from_csv('examples/test.csv', tester)

        self.assertEqual(len(output.columns), 3)
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
示例#31
0
    def test_from_csv(self):
        table1 = Table(self.rows, self.columns)
        table2 = Table.from_csv('examples/test.csv', self.columns)

        self.assertSequenceEqual(table1.column_names, table2.column_names)
        self.assertSequenceEqual(table1.column_types, table2.column_types)

        self.assertEqual(len(table1.columns), len(table2.columns))
        self.assertEqual(len(table1.rows), len(table2.rows))

        self.assertSequenceEqual(table1.rows[0], table2.rows[0])
        self.assertSequenceEqual(table1.rows[1], table2.rows[1])
        self.assertSequenceEqual(table1.rows[2], table2.rows[2])
示例#32
0
    def test_from_csv_skip_lines_cr(self):
        warnings.simplefilter('ignore')

        try:
            table1 = Table(self.rows[1:], column_types=self.column_types)
            table2 = Table.from_csv('examples/test_cr.csv', header=False, skip_lines=2)
        finally:
            warnings.resetwarnings()

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#33
0
    def test_from_csv_file_like_object(self):
        table1 = Table(self.rows, self.column_names, self.column_types)

        if six.PY2:
            f = open('examples/test.csv', 'rb')
        else:
            f = io.open('examples/test.csv', encoding='utf-8')

        table2 = Table.from_csv(f)
        f.close()

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
示例#34
0
def join_csv_files(filelist, args):
    tables = []
    cols = [args.join] + args.fields

    if args.add_comm:
        firstfile = filelist[args.basenames[0]]
        tables.append(comm_fields_table(firstfile, args))

    for base, f in filelist.items():
        t = Table.from_csv(f, delimiter=args.delimiter)
        t = t.select(cols)
        t = t.rename([x if x == args.join else f'{x}.{base}'
                      for x in t.column_names])

        tables.append(t)

    return reduce(lambda left, right: left.join(right, args.join), tables)
示例#35
0
    def test_from_csv_csvkit(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        table1 = Table(self.rows, self.columns)
        table2 = Table.from_csv('examples/test.csv', self.columns)

        self.assertSequenceEqual(table1.get_column_names(), table2.get_column_names())
        self.assertSequenceEqual(table1.get_column_types(), table2.get_column_types())

        self.assertEqual(len(table1.columns), len(table2.columns))
        self.assertEqual(len(table1.rows), len(table2.rows))

        self.assertSequenceEqual(table1.rows[0], table2.rows[0])
        self.assertSequenceEqual(table1.rows[1], table2.rows[1])
        self.assertSequenceEqual(table1.rows[2], table2.rows[2])
示例#36
0
    def _get_data(self):
        if "csv" in self.settings["data_source"]:
            file_path = "%s/%s" % (self.folder_path, self.settings["data_source"]["csv"])

            # The csv delimiter can be set as an config option
            delimiter = ","
            if "delimiter" in self.settings["data_source"]:
                delimiter = self.settings["data_source"]["delimiter"]

            # Auto-detect column types
            tester = TypeTester(locale='sv_SE',)

            return Table.from_csv(file_path,
                column_types=tester,
                delimiter=delimiter,
                row_names=self.settings["data_source"]["key"])
        else:
            raise ValueError("Could not find any dataset")
示例#37
0
    def test_from_csv_sniff_limit_0(self):
        table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=0)

        self.assertColumnNames(table2,
                               ['number|text|boolean|date|datetime|timedelta'])
        self.assertColumnTypes(table2, [Text])
示例#38
0
def comm_fields_table(filepath, args):
    cols = [args.join, "circuit.rounds", "setupComm", "onlineComm"]
    return Table.from_csv(filepath, delimiter=args.delimiter) \
        .compute([comm_adder_computation('setup'),
                  comm_adder_computation('online')]) \
        .select(cols)
示例#39
0
    def test_from_csv_no_type_tester(self):
        tester = TypeTester(limit=0)

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(table, [Text, Text, Text, Text, Text, Text])
示例#40
0
    def test_from_csv_no_header(self):
        table = Table.from_csv('examples/test_no_header.csv', header=False)

        self.assertColumnNames(table, ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
示例#41
0
    def test_from_csv_no_header_columns(self):
        table = Table.from_csv('examples/test_no_header.csv', self.column_names, header=False)

        self.assertColumnNames(table, self.column_names)
        self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
示例#42
0
    def test_from_csv_no_header(self):
        table = Table.from_csv('examples/test_no_header.csv', header=False)

        self.assertColumnNames(table, ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
示例#43
0
    def test_from_csv_no_type_tester(self):
        tester = TypeTester(limit=0)

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(table, [Text, Text, Text, Text, Text, Text])