Exemplo n.º 1
0
    def test_from_csv_builtin(self):
        import csv
        from agate import table
        table.csv = csv

        if six.PY2:
            with self.assertRaises(UnicodeDecodeError):
                output = Table.from_csv('examples/test.csv', self.columns)
        else:
            output = Table.from_csv('examples/test.csv', self.columns)

            self.assertEqual(len(output.columns), 3)
Exemplo n.º 2
0
    def test_from_csv_builtin(self):
        import csv
        from agate import table
        table.csv = csv

        if six.PY2:
            with self.assertRaises(UnicodeDecodeError):
                table = Table.from_csv('examples/test.csv', self.columns)
        else:
            table = Table.from_csv('examples/test.csv', self.columns)

            self.assertEqual(len(table.columns), 3)
Exemplo n.º 3
0
    def test_from_csv_file_like_object(self):
        table1 = Table.from_csv('examples/test.csv', self.columns)

        with open('examples/test.csv') as fh:
            table2 = Table.from_csv(fh, self.columns)

            self.assertSequenceEqual(table1.column_names, table2.column_names)
            self.assertSequenceEqual(table1.column_types, table2.column_types)

            self.assertEqual(len(table1.columns), len(table2.columns))
            self.assertEqual(len(table1.rows), len(table2.rows))

            self.assertSequenceEqual(table1.rows[0], table2.rows[0])
            self.assertSequenceEqual(table1.rows[1], table2.rows[1])
            self.assertSequenceEqual(table1.rows[2], table2.rows[2])
Exemplo n.º 4
0
    def test_from_csv_file_like_object(self):
        table1 = Table.from_csv('examples/test.csv', self.column_names, self.column_types)

        with open('examples/test.csv') as fh:
            table2 = Table.from_csv(fh, self.column_names, self.column_types)

            self.assertSequenceEqual(table1.column_names, table2.column_names)
            self.assertSequenceEqual(table1.column_types, table2.column_types)

            self.assertEqual(len(table1.columns), len(table2.columns))
            self.assertEqual(len(table1.rows), len(table2.rows))

            self.assertSequenceEqual(table1.rows[0], table2.rows[0])
            self.assertSequenceEqual(table1.rows[1], table2.rows[1])
            self.assertSequenceEqual(table1.rows[2], table2.rows[2])
Exemplo n.º 5
0
    def test_from_csv_type_tester(self):
        tester = TypeTester(force={'number': Text()})

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(
            table, [Text, Text, Boolean, Date, DateTime, TimeDelta])
Exemplo n.º 6
0
    def test_from_csv_default_type_tester(self):
        output = Table.from_csv('examples/test.csv')

        self.assertEqual(len(output.columns), 3)
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
Exemplo n.º 7
0
    def test_from_csv_default_type_tester(self):
        output = Table.from_csv('examples/test.csv')

        self.assertEqual(len(output.columns), 3)
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
Exemplo n.º 8
0
 def _get_promise_table(self):
     """ Get the answers from the voting advice application
     """
     print "Get election machine data (YLE)"
     table = Table.from_csv("yle-vaalikone-2015.csv")\
         .where(lambda row: row["valittu"] == 1)\
         .select(["nimi", "puolue", self.question])
     self.promise_table = rename_column(table, self.question, "promise")
Exemplo n.º 9
0
    def test_from_csv_no_header_columns(self):
        table = Table.from_csv('examples/test_no_header.csv',
                               self.column_names,
                               header=False)

        self.assertColumnNames(table, self.column_names)
        self.assertColumnTypes(
            table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
Exemplo n.º 10
0
    def test_from_csv_no_header_type_inference(self):
        output = Table.from_csv('examples/test_no_header.csv', header=False)

        self.assertEqual(len(output.columns), 3)
        self.assertSequenceEqual(output.column_names, ('A', 'B', 'C'))
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
Exemplo n.º 11
0
    def test_from_csv_skip_lines(self):
        table1 = Table(self.rows[1:], column_types=self.column_types)
        table2 = Table.from_csv('examples/test.csv', header=False, skip_lines=2)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 12
0
    def test_from_fixed(self):
        table1 = Table.from_csv('examples/testfixed_converted.csv')
        table2 = Table.from_fixed('examples/testfixed', 'examples/testfixed_schema.csv')

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [type(c) for c in table1.column_types])

        self.assertRows(table2, table1.rows)
Exemplo n.º 13
0
    def test_from_csv_type_tester(self):
        tester = TypeTester(force={
            'number': Text()
        })

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(table, [Text, Text, Boolean, Date, DateTime, TimeDelta])
Exemplo n.º 14
0
    def test_from_csv_sniff_limit(self):
        table1 = Table(self.rows, self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=None)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 15
0
    def test_from_csv_no_header_type_inference(self):
        output = Table.from_csv('examples/test_no_header.csv', None, header=False)

        self.assertEqual(len(output.columns), 3)
        self.assertSequenceEqual(output.column_names, ('A', 'B', 'C'))
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
Exemplo n.º 16
0
    def test_from_csv_sniff_limit(self):
        table1 = Table(self.rows, self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=None)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 17
0
    def test_from_fixed(self):
        table1 = Table.from_csv("examples/testfixed_converted.csv")
        table2 = Table.from_fixed("examples/testfixed", "examples/testfixed_schema.csv")

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [type(c) for c in table1.column_types])

        self.assertRows(table2, table1.rows)
Exemplo n.º 18
0
    def test_from_csv_skip_lines_sequence(self):
        table1 = Table([self.rows[1]], column_names=self.column_names, column_types=self.column_types)
        table2 = Table.from_csv('examples/test.csv', skip_lines=(1, 3))

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 19
0
    def test_from_csv_file_like_object(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        table1 = Table.from_csv('examples/test.csv', self.columns)
        with open('examples/test.csv') as fh:
            table2 = Table.from_csv(fh, self.columns)

            self.assertSequenceEqual(table1.get_column_names(), table2.get_column_names())
            self.assertSequenceEqual(table1.get_column_types(), table2.get_column_types())

            self.assertEqual(len(table1.columns), len(table2.columns))
            self.assertEqual(len(table1.rows), len(table2.rows))

            self.assertSequenceEqual(table1.rows[0], table2.rows[0])
            self.assertSequenceEqual(table1.rows[1], table2.rows[1])
            self.assertSequenceEqual(table1.rows[2], table2.rows[2])
Exemplo n.º 20
0
    def test_from_csv_row_limit_too_high(self):
        table1 = Table(self.rows, self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test.csv', row_limit=200)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(
            table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 21
0
    def test_from_csv_no_header(self):
        warnings.simplefilter('ignore')

        try:
            table = Table.from_csv('examples/test_no_header.csv', header=False)
        finally:
            warnings.resetwarnings()

        self.assertColumnNames(table, ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
Exemplo n.º 22
0
    def test_from_csv_file_like_object(self):
        table1 = Table(self.rows, self.column_names, self.column_types)

        with io.open('examples/test.csv', encoding='utf-8') as f:
            table2 = Table.from_csv(f)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 23
0
    def test_from_csv_file_like_object(self):
        table1 = Table(self.rows, self.column_names, self.column_types)

        with io.open('examples/test.csv', encoding='utf-8') as f:
            table2 = Table.from_csv(f)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 24
0
    def test_from_csv_default_type_tester(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        table = Table.from_csv('examples/test.csv')

        self.assertEqual(len(table.columns), 3)
        self.assertIsInstance(table.columns[0].data_type, Number)
        self.assertIsInstance(table.columns[1].data_type, Number)
        self.assertIsInstance(table.columns[2].data_type, Text)
Exemplo n.º 25
0
    def test_from_csv_default_type_tester(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        table = Table.from_csv('examples/test.csv')

        self.assertEqual(len(table.columns), 3)
        self.assertIsInstance(table.columns[0].data_type, Number)
        self.assertIsInstance(table.columns[1].data_type, Number)
        self.assertIsInstance(table.columns[2].data_type, Text)
Exemplo n.º 26
0
    def test_from_csv_row_limit_no_header_columns(self):
        table1 = Table(self.rows[:2], self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test_no_header.csv',
                                self.column_names,
                                header=False,
                                row_limit=2)

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(
            table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 27
0
    def test_from_csv_no_header(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        output = Table.from_csv('examples/test_no_header.csv', None, header=False)

        self.assertEqual(len(output.columns), 3)
        self.assertSequenceEqual(output.column_names, ('A', 'B', 'C'))
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
Exemplo n.º 28
0
    def test_from_csv(self):
        table1 = Table(self.rows, self.column_names, self.column_types)
        table2 = Table.from_csv('examples/test.csv', self.column_names, self.column_types)

        self.assertSequenceEqual(table1.column_names, table2.column_names)
        self.assertSequenceEqual(table1.column_types, table2.column_types)

        self.assertEqual(len(table1.columns), len(table2.columns))
        self.assertEqual(len(table1.rows), len(table2.rows))

        self.assertSequenceEqual(table1.rows[0], table2.rows[0])
        self.assertSequenceEqual(table1.rows[1], table2.rows[1])
        self.assertSequenceEqual(table1.rows[2], table2.rows[2])
Exemplo n.º 29
0
    def test_from_csv_type_tester(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        tester = TypeTester()

        output = Table.from_csv('examples/test.csv', tester)

        self.assertEqual(len(output.columns), 3)
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
Exemplo n.º 30
0
    def test_from_csv_type_tester(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        tester = TypeTester()

        output = Table.from_csv('examples/test.csv', tester)

        self.assertEqual(len(output.columns), 3)
        self.assertIsInstance(output.columns[0].data_type, Number)
        self.assertIsInstance(output.columns[1].data_type, Number)
        self.assertIsInstance(output.columns[2].data_type, Text)
Exemplo n.º 31
0
    def test_from_csv(self):
        table1 = Table(self.rows, self.columns)
        table2 = Table.from_csv('examples/test.csv', self.columns)

        self.assertSequenceEqual(table1.column_names, table2.column_names)
        self.assertSequenceEqual(table1.column_types, table2.column_types)

        self.assertEqual(len(table1.columns), len(table2.columns))
        self.assertEqual(len(table1.rows), len(table2.rows))

        self.assertSequenceEqual(table1.rows[0], table2.rows[0])
        self.assertSequenceEqual(table1.rows[1], table2.rows[1])
        self.assertSequenceEqual(table1.rows[2], table2.rows[2])
Exemplo n.º 32
0
    def test_from_csv_skip_lines_cr(self):
        warnings.simplefilter('ignore')

        try:
            table1 = Table(self.rows[1:], column_types=self.column_types)
            table2 = Table.from_csv('examples/test_cr.csv', header=False, skip_lines=2)
        finally:
            warnings.resetwarnings()

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 33
0
    def test_from_csv_file_like_object(self):
        table1 = Table(self.rows, self.column_names, self.column_types)

        if six.PY2:
            f = open('examples/test.csv', 'rb')
        else:
            f = io.open('examples/test.csv', encoding='utf-8')

        table2 = Table.from_csv(f)
        f.close()

        self.assertColumnNames(table2, table1.column_names)
        self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta])

        self.assertRows(table2, table1.rows)
Exemplo n.º 34
0
def join_csv_files(filelist, args):
    tables = []
    cols = [args.join] + args.fields

    if args.add_comm:
        firstfile = filelist[args.basenames[0]]
        tables.append(comm_fields_table(firstfile, args))

    for base, f in filelist.items():
        t = Table.from_csv(f, delimiter=args.delimiter)
        t = t.select(cols)
        t = t.rename([x if x == args.join else f'{x}.{base}'
                      for x in t.column_names])

        tables.append(t)

    return reduce(lambda left, right: left.join(right, args.join), tables)
Exemplo n.º 35
0
    def test_from_csv_csvkit(self):
        import csvkit
        from agate import table
        table.csv = csvkit

        table1 = Table(self.rows, self.columns)
        table2 = Table.from_csv('examples/test.csv', self.columns)

        self.assertSequenceEqual(table1.get_column_names(), table2.get_column_names())
        self.assertSequenceEqual(table1.get_column_types(), table2.get_column_types())

        self.assertEqual(len(table1.columns), len(table2.columns))
        self.assertEqual(len(table1.rows), len(table2.rows))

        self.assertSequenceEqual(table1.rows[0], table2.rows[0])
        self.assertSequenceEqual(table1.rows[1], table2.rows[1])
        self.assertSequenceEqual(table1.rows[2], table2.rows[2])
Exemplo n.º 36
0
    def _get_data(self):
        if "csv" in self.settings["data_source"]:
            file_path = "%s/%s" % (self.folder_path, self.settings["data_source"]["csv"])

            # The csv delimiter can be set as an config option
            delimiter = ","
            if "delimiter" in self.settings["data_source"]:
                delimiter = self.settings["data_source"]["delimiter"]

            # Auto-detect column types
            tester = TypeTester(locale='sv_SE',)

            return Table.from_csv(file_path,
                column_types=tester,
                delimiter=delimiter,
                row_names=self.settings["data_source"]["key"])
        else:
            raise ValueError("Could not find any dataset")
Exemplo n.º 37
0
    def test_from_csv_sniff_limit_0(self):
        table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=0)

        self.assertColumnNames(table2,
                               ['number|text|boolean|date|datetime|timedelta'])
        self.assertColumnTypes(table2, [Text])
Exemplo n.º 38
0
def comm_fields_table(filepath, args):
    cols = [args.join, "circuit.rounds", "setupComm", "onlineComm"]
    return Table.from_csv(filepath, delimiter=args.delimiter) \
        .compute([comm_adder_computation('setup'),
                  comm_adder_computation('online')]) \
        .select(cols)
Exemplo n.º 39
0
    def test_from_csv_no_type_tester(self):
        tester = TypeTester(limit=0)

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(table, [Text, Text, Text, Text, Text, Text])
Exemplo n.º 40
0
    def test_from_csv_no_header(self):
        table = Table.from_csv('examples/test_no_header.csv', header=False)

        self.assertColumnNames(table, ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
Exemplo n.º 41
0
    def test_from_csv_no_header_columns(self):
        table = Table.from_csv('examples/test_no_header.csv', self.column_names, header=False)

        self.assertColumnNames(table, self.column_names)
        self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
Exemplo n.º 42
0
    def test_from_csv_no_header(self):
        table = Table.from_csv('examples/test_no_header.csv', header=False)

        self.assertColumnNames(table, ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
Exemplo n.º 43
0
    def test_from_csv_no_type_tester(self):
        tester = TypeTester(limit=0)

        table = Table.from_csv('examples/test.csv', column_types=tester)

        self.assertColumnTypes(table, [Text, Text, Text, Text, Text, Text])