def test_iter_with_mismatch_row_metadata(self): # self.headers and csv file headers contains mismatch row metadata data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \ "1,2,etag1,\"I like trains\"\n" \ "5,1,etag2,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) iter = table.__iter__() pytest.raises(ValueError, next, iter)
def test_iter_row_metadata_mismatch_in_headers(self): # csv file does not contain row metadata, self.headers does data = "col1,col2\n" \ "1,2\n" \ "2,1\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) iter = table.__iter__() pytest.raises(ValueError, next, iter)
def test_iter_with_file_view_row_metadata(self): # csv file and self.headers contain matching row metadata data = "ROW_ID,ROW_VERSION,ROW_ETAG,col\n" \ "1,2,etag1,\"I like trains\"\n" \ "5,1,etag2,\"weeeeeeeeeeee\"\n" cols = as_table_columns(StringIOContextManager(data)) headers = [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING"), SelectColumn(name="ROW_ETAG", columnType="STRING")] + \ [SelectColumn.from_column(col) for col in cols] with patch.object(io, "open", return_value=StringIOContextManager(data)): table = CsvFileTable("syn123", "/fake/file/path", headers=headers) expected_rows = [['1', '2', "etag1", "I like trains"], ['5', '1', "etag2", "weeeeeeeeeeee"]] for expected_row, table_row in zip(expected_rows, table): assert expected_row == table_row
def test_SelectColumn_forward_compatibility(): sc = SelectColumn(id=1, columnType="STRING", name="my_col", columnSQL="new") assert_equals(1, sc.get("id")) assert_equals("STRING", sc.get("columnType")) assert_equals("my_col", sc.get("name")) assert_equals("new", sc.get("columnSQL"))
def test_csv_table(): ## Maybe not truly a unit test, but here because it doesn't do ## network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") #TODO: use StringIO.StringIO(data) rather than writing files try: ## create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert isinstance(table, CsvFileTable) ## need to set column headers to read a CSV file table.setColumnHeaders([ SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING") ] + [SelectColumn.from_column(col) for col in cols]) ## test iterator for table_row, expected_row in zip(table, data): assert table_row == expected_row ## test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row[2:] assert rowset_row['rowId'] == expected_row[0] assert rowset_row['versionNumber'] == expected_row[1] ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [row[2] for row in data]) assert all(df['Born'] == [row[3] for row in data]) assert all(df['Living'] == [row[5] for row in data]) assert all(df.index == ['%s_%s' % tuple(row[0:2]) for row in data]) assert df.shape == (8, 4) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n' ) except Exception as ex1: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise
def test_SelectColumn_forward_compatibility(): sc = SelectColumn(id=1, columnType="STRING", name="my_col", columnSQL="new") assert 1 == sc.get("id") assert "STRING" == sc.get("columnType") assert "my_col" == sc.get("name") assert "new" == sc.get("columnSQL")