def test_from_csv_builtin(self): import csv from agate import table table.csv = csv if six.PY2: with self.assertRaises(UnicodeDecodeError): output = Table.from_csv('examples/test.csv', self.columns) else: output = Table.from_csv('examples/test.csv', self.columns) self.assertEqual(len(output.columns), 3)
def test_from_csv_builtin(self): import csv from agate import table table.csv = csv if six.PY2: with self.assertRaises(UnicodeDecodeError): table = Table.from_csv('examples/test.csv', self.columns) else: table = Table.from_csv('examples/test.csv', self.columns) self.assertEqual(len(table.columns), 3)
def test_from_csv_file_like_object(self): table1 = Table.from_csv('examples/test.csv', self.columns) with open('examples/test.csv') as fh: table2 = Table.from_csv(fh, self.columns) self.assertSequenceEqual(table1.column_names, table2.column_names) self.assertSequenceEqual(table1.column_types, table2.column_types) self.assertEqual(len(table1.columns), len(table2.columns)) self.assertEqual(len(table1.rows), len(table2.rows)) self.assertSequenceEqual(table1.rows[0], table2.rows[0]) self.assertSequenceEqual(table1.rows[1], table2.rows[1]) self.assertSequenceEqual(table1.rows[2], table2.rows[2])
def test_from_csv_file_like_object(self): table1 = Table.from_csv('examples/test.csv', self.column_names, self.column_types) with open('examples/test.csv') as fh: table2 = Table.from_csv(fh, self.column_names, self.column_types) self.assertSequenceEqual(table1.column_names, table2.column_names) self.assertSequenceEqual(table1.column_types, table2.column_types) self.assertEqual(len(table1.columns), len(table2.columns)) self.assertEqual(len(table1.rows), len(table2.rows)) self.assertSequenceEqual(table1.rows[0], table2.rows[0]) self.assertSequenceEqual(table1.rows[1], table2.rows[1]) self.assertSequenceEqual(table1.rows[2], table2.rows[2])
def test_from_csv_type_tester(self): tester = TypeTester(force={'number': Text()}) table = Table.from_csv('examples/test.csv', column_types=tester) self.assertColumnTypes( table, [Text, Text, Boolean, Date, DateTime, TimeDelta])
def test_from_csv_default_type_tester(self): output = Table.from_csv('examples/test.csv') self.assertEqual(len(output.columns), 3) self.assertIsInstance(output.columns[0].data_type, Number) self.assertIsInstance(output.columns[1].data_type, Number) self.assertIsInstance(output.columns[2].data_type, Text)
def _get_promise_table(self): """ Get the answers from the voting advice application """ print "Get election machine data (YLE)" table = Table.from_csv("yle-vaalikone-2015.csv")\ .where(lambda row: row["valittu"] == 1)\ .select(["nimi", "puolue", self.question]) self.promise_table = rename_column(table, self.question, "promise")
def test_from_csv_no_header_columns(self): table = Table.from_csv('examples/test_no_header.csv', self.column_names, header=False) self.assertColumnNames(table, self.column_names) self.assertColumnTypes( table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
def test_from_csv_no_header_type_inference(self): output = Table.from_csv('examples/test_no_header.csv', header=False) self.assertEqual(len(output.columns), 3) self.assertSequenceEqual(output.column_names, ('A', 'B', 'C')) self.assertIsInstance(output.columns[0].data_type, Number) self.assertIsInstance(output.columns[1].data_type, Number) self.assertIsInstance(output.columns[2].data_type, Text)
def test_from_csv_skip_lines(self): table1 = Table(self.rows[1:], column_types=self.column_types) table2 = Table.from_csv('examples/test.csv', header=False, skip_lines=2) self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def test_from_fixed(self): table1 = Table.from_csv('examples/testfixed_converted.csv') table2 = Table.from_fixed('examples/testfixed', 'examples/testfixed_schema.csv') self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [type(c) for c in table1.column_types]) self.assertRows(table2, table1.rows)
def test_from_csv_type_tester(self): tester = TypeTester(force={ 'number': Text() }) table = Table.from_csv('examples/test.csv', column_types=tester) self.assertColumnTypes(table, [Text, Text, Boolean, Date, DateTime, TimeDelta])
def test_from_csv_sniff_limit(self): table1 = Table(self.rows, self.column_names, self.column_types) table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=None) self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def test_from_csv_no_header_type_inference(self): output = Table.from_csv('examples/test_no_header.csv', None, header=False) self.assertEqual(len(output.columns), 3) self.assertSequenceEqual(output.column_names, ('A', 'B', 'C')) self.assertIsInstance(output.columns[0].data_type, Number) self.assertIsInstance(output.columns[1].data_type, Number) self.assertIsInstance(output.columns[2].data_type, Text)
def test_from_fixed(self): table1 = Table.from_csv("examples/testfixed_converted.csv") table2 = Table.from_fixed("examples/testfixed", "examples/testfixed_schema.csv") self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [type(c) for c in table1.column_types]) self.assertRows(table2, table1.rows)
def test_from_csv_skip_lines_sequence(self): table1 = Table([self.rows[1]], column_names=self.column_names, column_types=self.column_types) table2 = Table.from_csv('examples/test.csv', skip_lines=(1, 3)) self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def test_from_csv_file_like_object(self): import csvkit from agate import table table.csv = csvkit table1 = Table.from_csv('examples/test.csv', self.columns) with open('examples/test.csv') as fh: table2 = Table.from_csv(fh, self.columns) self.assertSequenceEqual(table1.get_column_names(), table2.get_column_names()) self.assertSequenceEqual(table1.get_column_types(), table2.get_column_types()) self.assertEqual(len(table1.columns), len(table2.columns)) self.assertEqual(len(table1.rows), len(table2.rows)) self.assertSequenceEqual(table1.rows[0], table2.rows[0]) self.assertSequenceEqual(table1.rows[1], table2.rows[1]) self.assertSequenceEqual(table1.rows[2], table2.rows[2])
def test_from_csv_row_limit_too_high(self): table1 = Table(self.rows, self.column_names, self.column_types) table2 = Table.from_csv('examples/test.csv', row_limit=200) self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes( table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def test_from_csv_no_header(self): warnings.simplefilter('ignore') try: table = Table.from_csv('examples/test_no_header.csv', header=False) finally: warnings.resetwarnings() self.assertColumnNames(table, ['a', 'b', 'c', 'd', 'e', 'f']) self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
def test_from_csv_file_like_object(self): table1 = Table(self.rows, self.column_names, self.column_types) with io.open('examples/test.csv', encoding='utf-8') as f: table2 = Table.from_csv(f) self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def test_from_csv_default_type_tester(self): import csvkit from agate import table table.csv = csvkit table = Table.from_csv('examples/test.csv') self.assertEqual(len(table.columns), 3) self.assertIsInstance(table.columns[0].data_type, Number) self.assertIsInstance(table.columns[1].data_type, Number) self.assertIsInstance(table.columns[2].data_type, Text)
def test_from_csv_row_limit_no_header_columns(self): table1 = Table(self.rows[:2], self.column_names, self.column_types) table2 = Table.from_csv('examples/test_no_header.csv', self.column_names, header=False, row_limit=2) self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes( table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def test_from_csv_no_header(self): import csvkit from agate import table table.csv = csvkit output = Table.from_csv('examples/test_no_header.csv', None, header=False) self.assertEqual(len(output.columns), 3) self.assertSequenceEqual(output.column_names, ('A', 'B', 'C')) self.assertIsInstance(output.columns[0].data_type, Number) self.assertIsInstance(output.columns[1].data_type, Number) self.assertIsInstance(output.columns[2].data_type, Text)
def test_from_csv(self): table1 = Table(self.rows, self.column_names, self.column_types) table2 = Table.from_csv('examples/test.csv', self.column_names, self.column_types) self.assertSequenceEqual(table1.column_names, table2.column_names) self.assertSequenceEqual(table1.column_types, table2.column_types) self.assertEqual(len(table1.columns), len(table2.columns)) self.assertEqual(len(table1.rows), len(table2.rows)) self.assertSequenceEqual(table1.rows[0], table2.rows[0]) self.assertSequenceEqual(table1.rows[1], table2.rows[1]) self.assertSequenceEqual(table1.rows[2], table2.rows[2])
def test_from_csv_type_tester(self): import csvkit from agate import table table.csv = csvkit tester = TypeTester() output = Table.from_csv('examples/test.csv', tester) self.assertEqual(len(output.columns), 3) self.assertIsInstance(output.columns[0].data_type, Number) self.assertIsInstance(output.columns[1].data_type, Number) self.assertIsInstance(output.columns[2].data_type, Text)
def test_from_csv(self): table1 = Table(self.rows, self.columns) table2 = Table.from_csv('examples/test.csv', self.columns) self.assertSequenceEqual(table1.column_names, table2.column_names) self.assertSequenceEqual(table1.column_types, table2.column_types) self.assertEqual(len(table1.columns), len(table2.columns)) self.assertEqual(len(table1.rows), len(table2.rows)) self.assertSequenceEqual(table1.rows[0], table2.rows[0]) self.assertSequenceEqual(table1.rows[1], table2.rows[1]) self.assertSequenceEqual(table1.rows[2], table2.rows[2])
def test_from_csv_skip_lines_cr(self): warnings.simplefilter('ignore') try: table1 = Table(self.rows[1:], column_types=self.column_types) table2 = Table.from_csv('examples/test_cr.csv', header=False, skip_lines=2) finally: warnings.resetwarnings() self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def test_from_csv_file_like_object(self): table1 = Table(self.rows, self.column_names, self.column_types) if six.PY2: f = open('examples/test.csv', 'rb') else: f = io.open('examples/test.csv', encoding='utf-8') table2 = Table.from_csv(f) f.close() self.assertColumnNames(table2, table1.column_names) self.assertColumnTypes(table2, [Number, Text, Boolean, Date, DateTime, TimeDelta]) self.assertRows(table2, table1.rows)
def join_csv_files(filelist, args): tables = [] cols = [args.join] + args.fields if args.add_comm: firstfile = filelist[args.basenames[0]] tables.append(comm_fields_table(firstfile, args)) for base, f in filelist.items(): t = Table.from_csv(f, delimiter=args.delimiter) t = t.select(cols) t = t.rename([x if x == args.join else f'{x}.{base}' for x in t.column_names]) tables.append(t) return reduce(lambda left, right: left.join(right, args.join), tables)
def test_from_csv_csvkit(self): import csvkit from agate import table table.csv = csvkit table1 = Table(self.rows, self.columns) table2 = Table.from_csv('examples/test.csv', self.columns) self.assertSequenceEqual(table1.get_column_names(), table2.get_column_names()) self.assertSequenceEqual(table1.get_column_types(), table2.get_column_types()) self.assertEqual(len(table1.columns), len(table2.columns)) self.assertEqual(len(table1.rows), len(table2.rows)) self.assertSequenceEqual(table1.rows[0], table2.rows[0]) self.assertSequenceEqual(table1.rows[1], table2.rows[1]) self.assertSequenceEqual(table1.rows[2], table2.rows[2])
def _get_data(self): if "csv" in self.settings["data_source"]: file_path = "%s/%s" % (self.folder_path, self.settings["data_source"]["csv"]) # The csv delimiter can be set as an config option delimiter = "," if "delimiter" in self.settings["data_source"]: delimiter = self.settings["data_source"]["delimiter"] # Auto-detect column types tester = TypeTester(locale='sv_SE',) return Table.from_csv(file_path, column_types=tester, delimiter=delimiter, row_names=self.settings["data_source"]["key"]) else: raise ValueError("Could not find any dataset")
def test_from_csv_sniff_limit_0(self): table2 = Table.from_csv('examples/test_csv_sniff.csv', sniff_limit=0) self.assertColumnNames(table2, ['number|text|boolean|date|datetime|timedelta']) self.assertColumnTypes(table2, [Text])
def comm_fields_table(filepath, args): cols = [args.join, "circuit.rounds", "setupComm", "onlineComm"] return Table.from_csv(filepath, delimiter=args.delimiter) \ .compute([comm_adder_computation('setup'), comm_adder_computation('online')]) \ .select(cols)
def test_from_csv_no_type_tester(self): tester = TypeTester(limit=0) table = Table.from_csv('examples/test.csv', column_types=tester) self.assertColumnTypes(table, [Text, Text, Text, Text, Text, Text])
def test_from_csv_no_header(self): table = Table.from_csv('examples/test_no_header.csv', header=False) self.assertColumnNames(table, ['a', 'b', 'c', 'd', 'e', 'f']) self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])
def test_from_csv_no_header_columns(self): table = Table.from_csv('examples/test_no_header.csv', self.column_names, header=False) self.assertColumnNames(table, self.column_names) self.assertColumnTypes(table, [Number, Text, Boolean, Date, DateTime, TimeDelta])