Exemplo n.º 1
0
    def test_to_dataframe_error_if_pandas_is_none(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {
                'f': [{
                    'v': 'Phred Phlyntstone'
                }, {
                    'v': '32'
                }]
            },
            {
                'f': [{
                    'v': 'Bharney Rhubble'
                }, {
                    'v': '33'
                }]
            },
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(mock.sentinel.client, api_request, path,
                                   schema)

        with self.assertRaises(ValueError):
            row_iterator.to_dataframe()
Exemplo n.º 2
0
    def test_to_dataframe(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        rows = [
            {'f': [{'v': 'Phred Phlyntstone'}, {'v': '32'}]},
            {'f': [{'v': 'Bharney Rhubble'}, {'v': '33'}]},
            {'f': [{'v': 'Wylma Phlyntstone'}, {'v': '29'}]},
            {'f': [{'v': 'Bhettye Rhubble'}, {'v': '27'}]},
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 4)  # verify the number of rows
        self.assertEqual(list(df), ['name', 'age'])  # verify the column names
        self.assertEqual(df.name.dtype.name, 'object')
        self.assertEqual(df.age.dtype.name, 'int64')
Exemplo n.º 3
0
    def test_to_dataframe_w_empty_results(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('name', 'STRING', mode='REQUIRED'),
            SchemaField('age', 'INTEGER', mode='REQUIRED')
        ]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': []})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 0)  # verify the number of rows
        self.assertEqual(list(df), ['name', 'age'])  # verify the column names
Exemplo n.º 4
0
    def test_to_dataframe_w_various_types_nullable(self):
        import datetime
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('start_timestamp', 'TIMESTAMP'),
            SchemaField('seconds', 'INT64'),
            SchemaField('miles', 'FLOAT64'),
            SchemaField('payment_type', 'STRING'),
            SchemaField('complete', 'BOOL'),
            SchemaField('date', 'DATE'),
        ]
        row_data = [
            [None, None, None, None, None, None],
            ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'],
            ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'],
            ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'],
        ]
        rows = [{'f': [{'v': field} for field in row]} for row in row_data]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 4)  # verify the number of rows
        exp_columns = [field.name for field in schema]
        self.assertEqual(list(df), exp_columns)  # verify the column names

        for index, row in df.iterrows():
            if index == 0:
                self.assertTrue(row.isnull().all())
            else:
                self.assertIsInstance(row.start_timestamp, pandas.Timestamp)
                self.assertIsInstance(row.seconds, float)
                self.assertIsInstance(row.payment_type, str)
                self.assertIsInstance(row.complete, bool)
                self.assertIsInstance(row.date, datetime.date)
Exemplo n.º 5
0
    def test_to_dataframe_column_dtypes(self):
        from google.cloud.bigquery.table import RowIterator
        from google.cloud.bigquery.table import SchemaField

        schema = [
            SchemaField('start_timestamp', 'TIMESTAMP'),
            SchemaField('seconds', 'INT64'),
            SchemaField('miles', 'FLOAT64'),
            SchemaField('payment_type', 'STRING'),
            SchemaField('complete', 'BOOL'),
            SchemaField('date', 'DATE'),
        ]
        row_data = [
            ['1.4338368E9', '420', '1.1', 'Cash', 'true', '1999-12-01'],
            ['1.3878117E9', '2580', '17.7', 'Cash', 'false', '1953-06-14'],
            ['1.3855653E9', '2280', '4.4', 'Credit', 'true', '1981-11-04'],
        ]
        rows = [{'f': [{'v': field} for field in row]} for row in row_data]
        path = '/foo'
        api_request = mock.Mock(return_value={'rows': rows})
        row_iterator = RowIterator(
            mock.sentinel.client, api_request, path, schema)

        df = row_iterator.to_dataframe()

        self.assertIsInstance(df, pandas.DataFrame)
        self.assertEqual(len(df), 3)  # verify the number of rows
        exp_columns = [field.name for field in schema]
        self.assertEqual(list(df), exp_columns)  # verify the column names

        self.assertEqual(df.start_timestamp.dtype.name, 'datetime64[ns, UTC]')
        self.assertEqual(df.seconds.dtype.name, 'int64')
        self.assertEqual(df.miles.dtype.name, 'float64')
        self.assertEqual(df.payment_type.dtype.name, 'object')
        self.assertEqual(df.complete.dtype.name, 'bool')
        self.assertEqual(df.date.dtype.name, 'object')