def test_complex_type_errors(self, query, error):
     # type: (str, str) -> None
     node, leftover = select_rule(tokenize(query))
     self.assertFalse(leftover)
     assert isinstance(node, Select)
     with self.assertRaisesRegexp(ValueError, error):
         node.get_dataframe(self.small_table_context)
示例#2
0
    def test_select_star(self, select, expected_result):
        # type: (str, List[List[int]]) -> None
        group_table_context = DatasetTableContext({
            'p': {
                'd': {
                    'table1':
                    TypedDataFrame(
                        pd.DataFrame([[2, 8, 4], [6, 3, 0], [12, 10, 1]],
                                     columns=['a', 'b', 'c']),
                        types=[
                            BQScalarType.INTEGER, BQScalarType.INTEGER,
                            BQScalarType.INTEGER
                        ]),
                    'table2':
                    TypedDataFrame(
                        pd.DataFrame([[2, 7, 3], [6, 2, -1], [12, 9, 0]],
                                     columns=['a', 'd', 'e']),
                        types=[
                            BQScalarType.INTEGER, BQScalarType.INTEGER,
                            BQScalarType.INTEGER
                        ]),
                }
            }
        })

        select_node, leftover = select_rule(tokenize(select))
        assert isinstance(select_node, Select)
        dataframe, unused_table_name = select_node.get_dataframe(
            group_table_context)
        self.assertFalse(leftover)
        self.assertEqual(dataframe.to_list_of_lists(), expected_result)
示例#3
0
 def test_struct_field_and_constant(self, query, expected_result, expected_types):
     # type: (str, List[List[Tuple[Optional[int], ...]]], Sequence[BQStructType]) -> None
     node, leftover = select_rule(tokenize(query))
     self.assertFalse(leftover)
     assert isinstance(node, Select)
     result, unused_table_name = node.get_dataframe(self.small_table_context)
     self.assertEqual(result.to_list_of_lists(), expected_result)
     self.assertEqual(result.types, expected_types)
 def test_complex_types(self, query, expected_result, expected_type):
     # type: (str, Tuple[Optional[int], ...], BQType) -> None
     table_context = DatasetTableContext({})
     node, leftover = select_rule(tokenize(query))
     self.assertFalse(leftover)
     assert isinstance(node, Select)
     result, unused_table_name = node.get_dataframe(table_context)
     self.assertEqual(result.to_list_of_lists(), [[expected_result]])
     self.assertEqual(result.types, [expected_type])
示例#5
0
    def test_non_aggregate_function_in_group_by(self):
        table_context = DatasetTableContext(
            {'my_project': {'my_dataset': {'my_table': TypedDataFrame(
                pd.DataFrame([['one', '1'], ['two', '1'], ['three', '2'], ['four', '2']],
                             columns=['a', 'b']),
                types=[BQScalarType.STRING, BQScalarType.INTEGER])}}})

        tokens = tokenize('select max(concat(b, "hi")) from my_table group by b')
        node, leftover = select_rule(tokens)
        self.assertFalse(leftover)
        result, unused_table_name = node.get_dataframe(table_context)
        self.assertEqual(result.to_list_of_lists(), [['1hi'], ['2hi']])
示例#6
0
 def test_analytic_function_with_group_by(self, selectors, expected_result):
     table_context = DatasetTableContext(
         {'my_project': {'my_dataset': {'my_table': TypedDataFrame(
             pd.DataFrame([[20, 2], [10, 2], [30, 3], [31, 3], [32, 3]], columns=['a', 'b']),
             types=[BQScalarType.INTEGER, BQScalarType.INTEGER])}}})
     tokens = tokenize('select {} from my_table group by b'.format(selectors))
     node, leftover = select_rule(tokens)
     result, unused_table_name = node.get_dataframe(table_context)
     self.assertFalse(leftover)
     # Note: BQ docs say if ORDER BY clause (for the select as a whole) is not present, order of
     # results is undefined, so we do not assert on the order.
     six.assertCountEqual(self, result.to_list_of_lists(), expected_result)
示例#7
0
    def test_array_agg_arguments(self, query, expected_result):
        # type: (str, Tuple[Optional[int], ...]) -> None
        table_context = DatasetTableContext(
            {'p': {'d': {'t':
                         TypedDataFrame(pd.DataFrame([[1], [1], [2], [None]], columns=['a']),
                                        types=[BQScalarType.INTEGER])}}})

        node, leftover = select_rule(tokenize(query + ' FROM p.d.t'))
        self.assertFalse(leftover)
        assert isinstance(node, Select)
        result, unused_table_name = node.get_dataframe(table_context)
        self.assertEqual(result.to_list_of_lists(), [[expected_result]])
示例#8
0
    def test_aggregate_functions_in_group_by(self, selectors, expected_result):
        # type: (str, List[List[int]]) -> None
        table_context = DatasetTableContext(
            {'my_project': {'my_dataset': {'my_table': TypedDataFrame(
                pd.DataFrame([[2, 1], [4, 1], [5, 2], [np.nan, 2]], columns=['a', 'b']),
                types=[BQScalarType.INTEGER, BQScalarType.INTEGER])}}})

        tokens = tokenize('select {} from my_table group by b'.format(selectors))
        node, leftover = select_rule(tokens)
        assert isinstance(node, Select)
        result, unused_table_name = node.get_dataframe(table_context)
        self.assertFalse(leftover)
        self.assertEqual(result.to_list_of_lists(), expected_result)
示例#9
0
 def test_select_distinct(self, select, expected_result):
     # type: (str, List[List[int]]) -> None
     table_context = DatasetTableContext({
         'my_project': {
             'my_dataset': {
                 'my_table':
                 TypedDataFrame(
                     pd.DataFrame([[1, 2], [1, 3]], columns=['a', 'b']),
                     types=[BQScalarType.INTEGER, BQScalarType.INTEGER])
             }
         }
     })
     select_node, leftover = select_rule(tokenize(select))
     assert isinstance(select_node, Select)
     dataframe, unused_table_name = select_node.get_dataframe(table_context)
     self.assertFalse(leftover)
     self.assertEqual(dataframe.to_list_of_lists(), expected_result)
示例#10
0
 def test_count(self, count, expected_result):
     # type: (str, List[List[int]]) -> None
     count_table_context = DatasetTableContext({
         'my_project': {
             'my_dataset': {
                 'my_table': TypedDataFrame(
                     pd.DataFrame([[1, 2, 3], [None, 4, 3]], columns=['a', 'b', 'c']),
                     types=[BQScalarType.INTEGER, BQScalarType.INTEGER, BQScalarType.INTEGER]
                 )
             }
         }
     })
     select, leftover = select_rule(tokenize('SELECT {} FROM my_table'.format(count)))
     self.assertFalse(leftover)
     assert isinstance(select, Select)
     dataframe, unused_table_name = select.get_dataframe(count_table_context)
     self.assertEqual(dataframe.to_list_of_lists(), expected_result)
    def test_aggregate_functions_in_expressions(self, query, expected_result):
        # type: (str, List[List[int]]) -> None
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1], [2], [3]],
                                                columns=['a']),
                                   types=[BQScalarType.INTEGER])
                }
            }
        })

        node, leftover = select_rule(tokenize(query))
        assert isinstance(node, Select)
        result, unused_table_name = node.get_dataframe(table_context)
        self.assertFalse(leftover)
        self.assertEqual(result.to_list_of_lists(), expected_result)
示例#12
0
 def test_select_group_by_error(self, select):
     # type: (str) -> None
     group_table_context = DatasetTableContext({
         'my_project': {
             'my_dataset': {
                 'my_table':
                 TypedDataFrame(pd.DataFrame([[1, 2, 3], [1, 3, 3]],
                                             columns=['a', 'b', 'c']),
                                types=[
                                    BQScalarType.INTEGER,
                                    BQScalarType.INTEGER,
                                    BQScalarType.INTEGER
                                ])
             }
         }
     })
     select_node, leftover = select_rule(tokenize(select))
     assert isinstance(select_node, Select)
     self.assertFalse(leftover)
     with self.assertRaisesRegexp(ValueError,
                                  "not aggregated or grouped by"):
         select_node.get_dataframe(group_table_context)
    def test_constructed_column_has_correct_index(self, query,
                                                  expected_result):
        # type: (str, List[List[int]]) -> None
        '''Checks that manually constructed columns have the same index as the data.

        A manually constructed column will usually have an index 0, 1, 2, ...
        (e.g. pd.Series(['a', 'b', 'c']) has index 0, 1, 2).
        The data may not; filtering, sorting or other changes might result in an index of
        different numbers.  If one column's index doesn't match the index of other columns,
        it can't be compared or joined with them properly.
        '''
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(
                        pd.DataFrame([[1, 2, -1], [4, 5, 6], [7, 8, 9]],
                                     columns=['a', 'b', 'c']),
                        types=[
                            BQScalarType.INTEGER, BQScalarType.INTEGER,
                            BQScalarType.INTEGER
                        ])
                }
            }
        })

        # Skip the first row of the table, so that the index of the table that
        # the test queries operate on is [1, 2]; this makes sure that the index is
        # different from the default index you would get for a two-row column,
        # which would be [0, 1], to test that expressions are not incorrectly
        # using that default index.
        node, leftover = select_rule(
            tokenize(query + ' from (select * from my_table where c > 0)'))
        assert isinstance(node, Select)
        result, unused_table_name = node.get_dataframe(table_context)
        self.assertFalse(leftover)
        self.assertEqual(result.to_list_of_lists(), expected_result)
        self.assertEqual(list(result.dataframe.index), [1, 2])