def test_data_source_join_overlapping_fields(self): table_context = DatasetTableContext({ 'my_project': { 'my_dataset': { 'my_table': TypedDataFrame( pd.DataFrame([[1, 9]], columns=['a', 'b']), types=[BQScalarType.INTEGER, BQScalarType.INTEGER]), 'my_table2': TypedDataFrame( pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'd']), types=[BQScalarType.INTEGER, BQScalarType.INTEGER]) } } }) initial_table = (TableReference( ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE) join_type = 'INNER' join_table = (TableReference( ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE) join_on = EMPTY_NODE joins = [(join_type, join_table, join_on)] data_source = DataSource(initial_table, joins) context = data_source.create_context(table_context) self.assertEqual(context.table.to_list_of_lists(), [[1, 9, 1, 2]]) self.assertEqual( list(context.table.dataframe), ['my_table.a', 'my_table.b', 'my_table2.a', 'my_table2.d'])
def test_data_source(self): data_source = DataSource((TableReference( ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), []) data_source_context = data_source.create_context(self.table_context) self.assertEqual(data_source_context.table.to_list_of_lists(), [[1], [2]]) self.assertEqual(list(data_source_context.table.dataframe), ['my_table.a']) self.assertEqual(data_source_context.table.types, [BQScalarType.INTEGER])
def test_select(self): # type: () -> None from_ = DataSource((TableReference( ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), []) selector = StarSelector(EMPTY_NODE, EMPTY_NODE, EMPTY_NODE) select = Select(EMPTY_NODE, [selector], from_, EMPTY_NODE, EMPTY_NODE, EMPTY_NODE) dataframe, table_name = select.get_dataframe(self.table_context) self.assertEqual(table_name, None) self.assertEqual(dataframe.to_list_of_lists(), [[1], [2], [3]]) self.assertEqual(list(dataframe.dataframe), ['a']) self.assertEqual(dataframe.types, [BQScalarType.INTEGER])
def test_query_expression_limit(self): # type: () -> None from_ = DataSource((TableReference( ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), []) selector = StarSelector(EMPTY_NODE, EMPTY_NODE, EMPTY_NODE) select = Select(EMPTY_NODE, [selector], from_, EMPTY_NODE, EMPTY_NODE, EMPTY_NODE) limit = Value(1, BQScalarType.INTEGER) offset = Value(1, BQScalarType.INTEGER) qe = QueryExpression(EMPTY_NODE, select, EMPTY_NODE, (limit, offset)) dataframe, table_name = qe.get_dataframe(self.table_context) self.assertEqual(dataframe.to_list_of_lists(), [[2]])
def test_data_source_join_multiple_joins(self): table_context = DatasetTableContext({ 'my_project': { 'my_dataset': { 'my_table': TypedDataFrame(pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'b1', 'c1']), types=[ BQScalarType.INTEGER, BQScalarType.INTEGER, BQScalarType.INTEGER ]), 'my_table2': TypedDataFrame(pd.DataFrame([[1, 8, 9], [0, 7, 2]], columns=['a', 'b', 'c2']), types=[ BQScalarType.INTEGER, BQScalarType.INTEGER, BQScalarType.INTEGER ]), 'my_table3': TypedDataFrame(pd.DataFrame([[3, 4, 5], [6, 7, 8]], columns=['a3', 'b', 'c3']), types=[ BQScalarType.INTEGER, BQScalarType.INTEGER, BQScalarType.INTEGER ]) } } }) initial_table = (TableReference( ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE) join_type = 'FULL' join_table2 = (TableReference( ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE) join_table3 = (TableReference( ('my_project', 'my_dataset', 'my_table3')), EMPTY_NODE) joins = [(join_type, join_table2, ('a', )), (join_type, join_table3, ('b', ))] data_source = DataSource(initial_table, joins) context = data_source.create_context(table_context) result = [[1, 2, 3, 1, 8, 9, None, None, None], [4, 5, 6, None, None, None, None, None, None], [None, None, None, 0, 7, 2, 6, 7, 8], [None, None, None, None, None, None, 3, 4, 5]] self.assertEqual(context.table.to_list_of_lists(), result)
def test_data_source_join_error( self, join_type, # type: str join_on, # type: ConditionsType error_type, # type: Type[BaseException] error # type: str ): # type: (...) -> None initial_table = (TableReference( ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE) join_table = (TableReference( ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE) joins = [Join(join_type, join_table, join_on)] data_source = DataSource(initial_table, joins) with self.assertRaisesRegexp(error_type, error): data_source.create_context(self.table_context)
def test_select_where(self, where): # type: (EvaluatableNode) -> None where_table_context = DatasetTableContext({ 'my_project': { 'my_dataset': { 'my_table': TypedDataFrame( pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']), types=[BQScalarType.INTEGER, BQScalarType.INTEGER]) } } }) fields = [Selector(Field(('a', )), EMPTY_NODE)] from_ = DataSource((TableReference( ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), []) select = Select(EMPTY_NODE, fields, from_, where, EMPTY_NODE, EMPTY_NODE) dataframe, table_name = select.get_dataframe(where_table_context) self.assertEqual(dataframe.to_list_of_lists(), [[3]])
def test_data_source_join_multiple_columns(self): table_context = DatasetTableContext({ 'my_project': { 'my_dataset': { 'my_table': TypedDataFrame(pd.DataFrame([[1, 2, 3], [1, 5, 6]], columns=['a', 'b', 'c']), types=[ BQScalarType.INTEGER, BQScalarType.INTEGER, BQScalarType.INTEGER ]), 'my_table2': TypedDataFrame(pd.DataFrame([[1, 2, 7], [3, 2, 8]], columns=['a', 'b', 'd']), types=[ BQScalarType.INTEGER, BQScalarType.INTEGER, BQScalarType.INTEGER ]) } } }) initial_table = (TableReference( ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE) join_type = 'FULL' join_table = (TableReference( ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE) join_on = ('a', 'b') joins = [(join_type, join_table, join_on)] data_source = DataSource(initial_table, joins) context = data_source.create_context(table_context) result = [[1, 2, 3, 1, 2, 7], [1, 5, 6, None, None, None], [None, None, None, 3, 2, 8]] self.assertEqual(context.table.to_list_of_lists(), result)