def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") # need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert_equals(table_row, expected_row) rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert_equals(rowset_row['values'], expected_row) table.columns = cols df = table.asDataFrame() assert_equals(list(df['Name']), [r[0] for r in data])
def test_RowSetTable(): row_set_json = { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'id': '353', 'name': 'name'}, {'columnType': 'DOUBLE', 'id': '355', 'name': 'x'}, {'columnType': 'DOUBLE', 'id': '3020', 'name': 'y'}, {'columnType': 'INTEGER', 'id': '891', 'name': 'n'}], 'rows': [{ 'rowId': 5, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 3}, {'rowId': 6, 'values': ['bar', '1.34', '2.4', '101'], 'versionNumber': 3}, {'rowId': 7, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 4}, {'rowId': 8, 'values': ['qux', '1.23', '2.2', '102'], 'versionNumber': 3}], 'tableId': 'syn2976298'} row_set = RowSet.from_json(row_set_json) assert row_set.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert row_set.tableId == 'syn2976298' assert len(row_set.headers) == 4 assert len(row_set.rows) == 4 schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353,355,3020,891], parent="syn1000001") table = Table(schema, row_set) assert table.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert table.tableId == 'syn2976298' assert len(table.headers) == 4 assert len(table.asRowSet().rows) == 4 try: import pandas as pd df = table.asDataFrame() assert df.shape == (4,4) assert all(df['name'] == ['foo', 'bar', 'foo', 'qux']) except ImportError as e1: sys.stderr.write('Pandas is apparently not installed, skipping part of test_RowSetTable.\n\n')
def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") ## need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert table_row == expected_row rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row table.columns = cols ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [r[0] for r in data]) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_list_of_rows_table.\n\n' )
def test_RowSetTable(): row_set_json = { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [ {'columnType': 'STRING', 'id': '353', 'name': 'name'}, {'columnType': 'DOUBLE', 'id': '355', 'name': 'x'}, {'columnType': 'DOUBLE', 'id': '3020', 'name': 'y'}, {'columnType': 'INTEGER', 'id': '891', 'name': 'n'}], 'rows': [{ 'rowId': 5, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 3}, {'rowId': 6, 'values': ['bar', '1.34', '2.4', '101'], 'versionNumber': 3}, {'rowId': 7, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 4}, {'rowId': 8, 'values': ['qux', '1.23', '2.2', '102'], 'versionNumber': 3}], 'tableId': 'syn2976298'} row_set = RowSet.from_json(row_set_json) assert_equals(row_set.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') assert_equals(row_set.tableId, 'syn2976298') assert_equals(len(row_set.headers), 4) assert_equals(len(row_set.rows), 4) schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353, 355, 3020, 891], parent="syn1000001") table = Table(schema, row_set) assert_equals(table.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') assert_equals(table.tableId, 'syn2976298') assert_equals(len(table.headers), 4) assert_equals(len(table.asRowSet().rows), 4) df = table.asDataFrame() assert_equals(df.shape, (4, 4)) assert_equals(list(df['name']), ['foo', 'bar', 'foo', 'qux'])
def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") ## need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in izip(table, data): assert table_row==expected_row rowset = table.asRowSet() for rowset_row, expected_row in izip(rowset.rows, data): assert rowset_row['values']==expected_row table.columns = cols ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [r[0] for r in data]) except ImportError as e1: sys.stderr.write('Pandas is apparently not installed, skipping asDataFrame portion of test_list_of_rows_table.\n\n')
def test_list_of_rows_table(): data = [["John Coltrane", 1926, 8.65, False], ["Miles Davis", 1926, 9.87, False], ["Bill Evans", 1929, 7.65, False], ["Paul Chambers", 1935, 5.14, False], ["Jimmy Cobb", 1929, 5.78, True], ["Scott LaFaro", 1936, 4.21, False], ["Sonny Rollins", 1930, 8.99, True], ["Kenny Burrel", 1931, 4.37, True]] cols = [ Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN') ] schema1 = Schema(name='Jazz Guys', columns=cols, id="syn1000002", parent="syn1000001") # need columns to do cast_values w/o storing table = Table(schema1, data, headers=[SelectColumn.from_column(col) for col in cols]) for table_row, expected_row in zip(table, data): assert_equals(table_row, expected_row) rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert_equals(rowset_row['values'], expected_row) table.columns = cols df = table.asDataFrame() assert_equals(list(df['Name']), [r[0] for r in data])
def test_csv_table(): ## Maybe not truly a unit test, but here because it doesn't do ## network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") #TODO: use StringIO.StringIO(data) rather than writing files try: ## create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=os.linesep) writer.writerow(['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols]) filename = temp.name for row in data: writer.writerow(row) table = Table(schema1, filename) assert isinstance(table, CsvFileTable) ## need to set column headers to read a CSV file table.setColumnHeaders( [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + [SelectColumn.from_column(col) for col in cols]) ## test iterator # print "\n\nJazz Guys" for table_row, expected_row in izip(table, data): # print table_row, expected_row assert table_row==expected_row ## test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in izip(rowset.rows, data): #print rowset_row, expected_row assert rowset_row['values']==expected_row[2:] assert rowset_row['rowId']==expected_row[0] assert rowset_row['versionNumber']==expected_row[1] ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [row[2] for row in data]) assert all(df['Born'] == [row[3] for row in data]) assert all(df['Living'] == [row[5] for row in data]) assert all(df.index == ['%s_%s'%tuple(row[0:2]) for row in data]) assert df.shape == (8,4) except ImportError as e1: sys.stderr.write('Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n') except Exception as ex1: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print ex raise
def test_csv_table(): ## Maybe not truly a unit test, but here because it doesn't do ## network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [] cols.append(Column(id='1', name='Name', columnType='STRING')) cols.append(Column(id='2', name='Born', columnType='INTEGER')) cols.append(Column(id='3', name='Hipness', columnType='DOUBLE')) cols.append(Column(id='4', name='Living', columnType='BOOLEAN')) schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") #TODO: use StringIO.StringIO(data) rather than writing files try: ## create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert isinstance(table, CsvFileTable) ## need to set column headers to read a CSV file table.setColumnHeaders([ SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING") ] + [SelectColumn.from_column(col) for col in cols]) ## test iterator for table_row, expected_row in zip(table, data): assert table_row == expected_row ## test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert rowset_row['values'] == expected_row[2:] assert rowset_row['rowId'] == expected_row[0] assert rowset_row['versionNumber'] == expected_row[1] ## test asDataFrame try: import pandas as pd df = table.asDataFrame() assert all(df['Name'] == [row[2] for row in data]) assert all(df['Born'] == [row[3] for row in data]) assert all(df['Living'] == [row[5] for row in data]) assert all(df.index == ['%s_%s' % tuple(row[0:2]) for row in data]) assert df.shape == (8, 4) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping asDataFrame portion of test_csv_table.\n\n' ) except Exception as ex1: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise
def test_RowSetTable(): row_set_json = { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'id': '353', 'name': 'name' }, { 'columnType': 'DOUBLE', 'id': '355', 'name': 'x' }, { 'columnType': 'DOUBLE', 'id': '3020', 'name': 'y' }, { 'columnType': 'INTEGER', 'id': '891', 'name': 'n' }], 'rows': [{ 'rowId': 5, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 3 }, { 'rowId': 6, 'values': ['bar', '1.34', '2.4', '101'], 'versionNumber': 3 }, { 'rowId': 7, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 4 }, { 'rowId': 8, 'values': ['qux', '1.23', '2.2', '102'], 'versionNumber': 3 }], 'tableId': 'syn2976298' } row_set = RowSet.from_json(row_set_json) assert row_set.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert row_set.tableId == 'syn2976298' assert len(row_set.headers) == 4 assert len(row_set.rows) == 4 schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353, 355, 3020, 891], parent="syn1000001") table = Table(schema, row_set) assert table.etag == 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee' assert table.tableId == 'syn2976298' assert len(table.headers) == 4 assert len(table.asRowSet().rows) == 4 try: import pandas as pd df = table.asDataFrame() assert df.shape == (4, 4) assert all(df['name'] == ['foo', 'bar', 'foo', 'qux']) except ImportError as e1: sys.stderr.write( 'Pandas is apparently not installed, skipping part of test_RowSetTable.\n\n' )
def test_csv_table(): # Maybe not truly a unit test, but here because it doesn't do # network IO to synapse data = [["1", "1", "John Coltrane", 1926, 8.65, False], ["2", "1", "Miles Davis", 1926, 9.87, False], ["3", "1", "Bill Evans", 1929, 7.65, False], ["4", "1", "Paul Chambers", 1935, 5.14, False], ["5", "1", "Jimmy Cobb", 1929, 5.78, True], ["6", "1", "Scott LaFaro", 1936, 4.21, False], ["7", "1", "Sonny Rollins", 1930, 8.99, True], ["8", "1", "Kenny Burrel", 1931, 4.37, True]] filename = None cols = [Column(id='1', name='Name', columnType='STRING'), Column(id='2', name='Born', columnType='INTEGER'), Column(id='3', name='Hipness', columnType='DOUBLE'), Column(id='4', name='Living', columnType='BOOLEAN')] schema1 = Schema(id='syn1234', name='Jazz Guys', columns=cols, parent="syn1000001") # TODO: use StringIO.StringIO(data) rather than writing files try: # create CSV file with tempfile.NamedTemporaryFile(delete=False) as temp: filename = temp.name with io.open(filename, mode='w', encoding="utf-8", newline='') as temp: writer = csv.writer(temp, quoting=csv.QUOTE_NONNUMERIC, lineterminator=str(os.linesep)) headers = ['ROW_ID', 'ROW_VERSION'] + [col.name for col in cols] writer.writerow(headers) for row in data: writer.writerow(row) table = Table(schema1, filename) assert_is_instance(table, CsvFileTable) # need to set column headers to read a CSV file table.setColumnHeaders( [SelectColumn(name="ROW_ID", columnType="STRING"), SelectColumn(name="ROW_VERSION", columnType="STRING")] + [SelectColumn.from_column(col) for col in cols]) # test iterator for table_row, expected_row in zip(table, data): assert_equals(table_row, expected_row) # test asRowSet rowset = table.asRowSet() for rowset_row, expected_row in zip(rowset.rows, data): assert_equals(rowset_row['values'], expected_row[2:]) assert_equals(rowset_row['rowId'], expected_row[0]) assert_equals(rowset_row['versionNumber'], expected_row[1]) df = table.asDataFrame() assert_equals(list(df['Name']), [row[2] for row in data]) assert_equals(list(df['Born']), [row[3] for row in data]) assert_equals(list(df['Living']), [row[5] for row in data]) assert_equals(list(df.index), ['%s_%s' % tuple(row[0:2]) for row in data]) assert_equals(df.shape, (8, 4)) except Exception: if filename: try: if os.path.isdir(filename): shutil.rmtree(filename) else: os.remove(filename) except Exception as ex: print(ex) raise
def test_RowSetTable(): row_set_json = { 'etag': 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee', 'headers': [{ 'columnType': 'STRING', 'id': '353', 'name': 'name' }, { 'columnType': 'DOUBLE', 'id': '355', 'name': 'x' }, { 'columnType': 'DOUBLE', 'id': '3020', 'name': 'y' }, { 'columnType': 'INTEGER', 'id': '891', 'name': 'n' }], 'rows': [{ 'rowId': 5, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 3 }, { 'rowId': 6, 'values': ['bar', '1.34', '2.4', '101'], 'versionNumber': 3 }, { 'rowId': 7, 'values': ['foo', '1.23', '2.2', '101'], 'versionNumber': 4 }, { 'rowId': 8, 'values': ['qux', '1.23', '2.2', '102'], 'versionNumber': 3 }], 'tableId': 'syn2976298' } row_set = RowSet.from_json(row_set_json) assert_equals(row_set.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') assert_equals(row_set.tableId, 'syn2976298') assert_equals(len(row_set.headers), 4) assert_equals(len(row_set.rows), 4) schema = Schema(id="syn2976298", name="Bogus Schema", columns=[353, 355, 3020, 891], parent="syn1000001") table = Table(schema, row_set) assert_equals(table.etag, 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee') assert_equals(table.tableId, 'syn2976298') assert_equals(len(table.headers), 4) assert_equals(len(table.asRowSet().rows), 4) df = table.asDataFrame() assert_equals(df.shape, (4, 4)) assert_equals(list(df['name']), ['foo', 'bar', 'foo', 'qux'])