Python DataSource示例，purplequery.join.DataSource Python示例

示例#1

0

显示文件

    def test_data_source_join_overlapping_fields(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(
                        pd.DataFrame([[1, 9]], columns=['a', 'b']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER]),
                    'my_table2':
                    TypedDataFrame(
                        pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'd']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER])
                }
            }
        })
        initial_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE)
        join_type = 'INNER'
        join_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE)
        join_on = EMPTY_NODE
        joins = [(join_type, join_table, join_on)]

        data_source = DataSource(initial_table, joins)
        context = data_source.create_context(table_context)

        self.assertEqual(context.table.to_list_of_lists(), [[1, 9, 1, 2]])
        self.assertEqual(
            list(context.table.dataframe),
            ['my_table.a', 'my_table.b', 'my_table2.a', 'my_table2.d'])

示例#2

0

显示文件

    def test_data_source(self):
        data_source = DataSource((TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), [])
        data_source_context = data_source.create_context(self.table_context)

        self.assertEqual(data_source_context.table.to_list_of_lists(),
                         [[1], [2]])
        self.assertEqual(list(data_source_context.table.dataframe),
                         ['my_table.a'])
        self.assertEqual(data_source_context.table.types,
                         [BQScalarType.INTEGER])

示例#3

0

显示文件

    def test_select(self):
        # type: () -> None
        from_ = DataSource((TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), [])
        selector = StarSelector(EMPTY_NODE, EMPTY_NODE, EMPTY_NODE)
        select = Select(EMPTY_NODE, [selector], from_, EMPTY_NODE, EMPTY_NODE,
                        EMPTY_NODE)
        dataframe, table_name = select.get_dataframe(self.table_context)

        self.assertEqual(table_name, None)
        self.assertEqual(dataframe.to_list_of_lists(), [[1], [2], [3]])
        self.assertEqual(list(dataframe.dataframe), ['a'])
        self.assertEqual(dataframe.types, [BQScalarType.INTEGER])

示例#4

0

显示文件

    def test_query_expression_limit(self):
        # type: () -> None
        from_ = DataSource((TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), [])
        selector = StarSelector(EMPTY_NODE, EMPTY_NODE, EMPTY_NODE)
        select = Select(EMPTY_NODE, [selector], from_, EMPTY_NODE, EMPTY_NODE,
                        EMPTY_NODE)

        limit = Value(1, BQScalarType.INTEGER)
        offset = Value(1, BQScalarType.INTEGER)
        qe = QueryExpression(EMPTY_NODE, select, EMPTY_NODE, (limit, offset))
        dataframe, table_name = qe.get_dataframe(self.table_context)

        self.assertEqual(dataframe.to_list_of_lists(), [[2]])

示例#5

0

显示文件

    def test_data_source_join_multiple_joins(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1, 2, 3], [4, 5, 6]],
                                                columns=['a', 'b1', 'c1']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ]),
                    'my_table2':
                    TypedDataFrame(pd.DataFrame([[1, 8, 9], [0, 7, 2]],
                                                columns=['a', 'b', 'c2']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ]),
                    'my_table3':
                    TypedDataFrame(pd.DataFrame([[3, 4, 5], [6, 7, 8]],
                                                columns=['a3', 'b', 'c3']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ])
                }
            }
        })
        initial_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE)
        join_type = 'FULL'
        join_table2 = (TableReference(
            ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE)
        join_table3 = (TableReference(
            ('my_project', 'my_dataset', 'my_table3')), EMPTY_NODE)
        joins = [(join_type, join_table2, ('a', )),
                 (join_type, join_table3, ('b', ))]
        data_source = DataSource(initial_table, joins)
        context = data_source.create_context(table_context)

        result = [[1, 2, 3, 1, 8, 9, None, None, None],
                  [4, 5, 6, None, None, None, None, None, None],
                  [None, None, None, 0, 7, 2, 6, 7, 8],
                  [None, None, None, None, None, None, 3, 4, 5]]
        self.assertEqual(context.table.to_list_of_lists(), result)

示例#6

0

显示文件

    def test_data_source_join_error(
            self,
            join_type,  # type: str
            join_on,  # type: ConditionsType
            error_type,  # type: Type[BaseException]
            error  # type: str
    ):
        # type: (...) -> None
        initial_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE)
        join_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE)
        joins = [Join(join_type, join_table, join_on)]

        data_source = DataSource(initial_table, joins)
        with self.assertRaisesRegexp(error_type, error):
            data_source.create_context(self.table_context)

示例#7

0

显示文件

    def test_select_where(self, where):
        # type: (EvaluatableNode) -> None
        where_table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(
                        pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b']),
                        types=[BQScalarType.INTEGER, BQScalarType.INTEGER])
                }
            }
        })

        fields = [Selector(Field(('a', )), EMPTY_NODE)]
        from_ = DataSource((TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE), [])
        select = Select(EMPTY_NODE, fields, from_, where, EMPTY_NODE,
                        EMPTY_NODE)
        dataframe, table_name = select.get_dataframe(where_table_context)

        self.assertEqual(dataframe.to_list_of_lists(), [[3]])

示例#8

0

显示文件

    def test_data_source_join_multiple_columns(self):
        table_context = DatasetTableContext({
            'my_project': {
                'my_dataset': {
                    'my_table':
                    TypedDataFrame(pd.DataFrame([[1, 2, 3], [1, 5, 6]],
                                                columns=['a', 'b', 'c']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ]),
                    'my_table2':
                    TypedDataFrame(pd.DataFrame([[1, 2, 7], [3, 2, 8]],
                                                columns=['a', 'b', 'd']),
                                   types=[
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER,
                                       BQScalarType.INTEGER
                                   ])
                }
            }
        })
        initial_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table')), EMPTY_NODE)
        join_type = 'FULL'
        join_table = (TableReference(
            ('my_project', 'my_dataset', 'my_table2')), EMPTY_NODE)
        join_on = ('a', 'b')
        joins = [(join_type, join_table, join_on)]

        data_source = DataSource(initial_table, joins)
        context = data_source.create_context(table_context)

        result = [[1, 2, 3, 1, 2, 7], [1, 5, 6, None, None, None],
                  [None, None, None, 3, 2, 8]]
        self.assertEqual(context.table.to_list_of_lists(), result)