def test_create_table_no_columns_parameter(self): dbc = sqlite3.connect(":memory:") importer = swadr.SQLite3CSVImporter(dbc) types = ["INTEGER", "TEXT", "REAL", "BLOB"] with dbc: cursor = dbc.cursor() importer.create_table("A", types=types) results = list(cursor.execute("PRAGMA table_info(A)")) # Verify types match up typecol = [r[2] for r in results] self.assertEqual(set(typecol), set(types))
def test_create_table_duplication_prevention(self): dbc = sqlite3.connect(":memory:") importer = swadr.SQLite3CSVImporter(dbc) columns = ["Red", "Green", "Red", "Blue", "Green", "Green"] expected = ["Red", "Green", "Red_2", "Blue", "Green_2", "Green_3"] with dbc: cursor = dbc.cursor() # The types parameter doesn't matter for this test, just needs to # be the same length as the columns parameter. importer.create_table("A", columns=columns, types=columns) # Verify identifiers match up results = list(cursor.execute("PRAGMA table_info(A)")) got = [r[1] for r in results] self.assertEqual(got, expected)
def test_loadfile_comma_separated_values(self): expected = [ (unicode("Jan"), 2014, unicode("A1"), 18), (unicode("Lucy"), 2016, unicode("B5"), 16), (unicode("Richard"), 2010, unicode("--"), 22), ] dbc = sqlite3.connect(":memory:") importer = swadr.SQLite3CSVImporter(dbc) test_file = resource_path("samples", "students.csv") importer.loadfile(test_file, "A") cursor = dbc.cursor() got = list(cursor.execute("SELECT * FROM A")) self.assertEqual(got, expected)
def test_create_table(self): dbc = sqlite3.connect(":memory:") importer = swadr.SQLite3CSVImporter(dbc) columns = ["Red", "Green", "Blue", "Black"] types = ["INTEGER", "TEXT", "REAL", "BLOB"] with dbc: cursor = dbc.cursor() importer.create_table("A", columns=columns, types=types) results = list(cursor.execute("PRAGMA table_info(A)")) # Verify types match up typecol = [r[2] for r in results] self.assertEqual(typecol, types) # Verify identifiers match up namecol = [r[1] for r in results] self.assertEqual(namecol, columns)
def test_loadfile_invalid_unicode(self): dbc = sqlite3.connect(":memory:") importer = swadr.SQLite3CSVImporter(dbc) test_file = resource_path("samples", "invalid-unicode.csv") importer.loadfile(test_file, "A") original_text_factory = dbc.text_factory dbc.text_factory = bytes cursor = dbc.cursor() with open(test_file, "rb") as iostream: rows = cursor.execute("SELECT * FROM A") for line, row in zip(iostream, rows): stripped_line = line.strip() flat_row = ",".encode("ascii").join(row) self.assertEqual(flat_row, stripped_line)
def test_ignore_errors_False(self): dbc = sqlite3.connect(":memory:") importer = swadr.SQLite3CSVImporter(dbc, ignore_errors=False, log_warnings=False) # This will cause the import to fail because the last line is short one # column. file_lines = ["A,B,C\n"] + ["1,2,3\n"] * 20 + ["7,8\n"] contents = "".join(file_lines).encode("ascii") try: tmpio = tempfile.NamedTemporaryFile(delete=False) filename = tmpio.name tmpio.write(contents) tmpio.close() self.assertRaises(sqlite3.Error, importer.loadfile, filename, "A") finally: os.unlink(tmpio.name)
def test_loadfile_tab_separated_values(self): expected = [ (1, 90, unicode("Richard")), (2, 100, unicode("Richard")), (3, 70, unicode("Richard")), (1, 85, unicode("Lucy")), (2, 99, unicode("Lucy")), (3, 80, unicode("Lucy")), (1, 55, unicode("Jan")), (2, 70, unicode("Jan")), (3, 40, unicode("Jan")), ] dbc = sqlite3.connect(":memory:") importer = swadr.SQLite3CSVImporter(dbc) test_file = resource_path("samples", "grades.tsv") importer.loadfile(test_file, "A") cursor = dbc.cursor() got = list(cursor.execute("SELECT * FROM A")) self.assertEqual(got, expected)
def test_loadfile_header_detection(self): dbc = sqlite3.connect(":memory:") importer = swadr.SQLite3CSVImporter(dbc) cursor = dbc.cursor() file_without_headers = resource_path("samples", "grades-no-header.tsv") importer.loadfile(file_without_headers, "A") expected_column_names_without_headers = [ unicode("a1"), unicode("a2"), unicode("a3")] results = list(cursor.execute("PRAGMA table_info(A)")) names_without_headers = [r[1] for r in results] self.assertEqual(names_without_headers, expected_column_names_without_headers) file_with_headers = resource_path("samples", "grades.tsv") importer.loadfile(file_with_headers, "B") expected_column_names_with_headers = [ unicode("Assignment"), unicode("Grade"), unicode("Student")] results = list(cursor.execute("PRAGMA table_info(B)")) names_with_headers = [r[1] for r in results] self.assertEqual(names_with_headers, expected_column_names_with_headers)