示例#1
0
    def test_heterogeneous_configs(self):
        expected1 = pandas.DataFrame(data={'x': [3, 10]})
        expected2 = pandas.DataFrame(data={'y': [4]})
        expected3 = pandas.DataFrame(data={'x': ['b'], 'y': ['a']})
        populate()
        config1 = LoadConfig(source='dataframe',
                             destination='dataset',
                             dataframe=expected1,
                             data_name='a10')
        config2 = LoadConfig(source='query',
                             destination='dataframe',
                             query='select 4 as y')
        config3 = LoadConfig(source='query',
                             destination='bucket',
                             query="select 'b' as x, 'a' as y",
                             data_name='a11')
        gpl = create_loader(bucket_dir_path=constants.bucket_subdir_path)
        load_results = gpl.multi_load([config1, config2, config3])
        self.assertEqual(len(load_results), 3)
        self.assertTrue(load_results[0] is None)
        self.assertTrue(load_results[2] is None)

        computed1 = load.dataset_to_dataframe('a10')
        self.assert_pandas_equal(expected1, computed1)

        computed2 = load_results[1]
        self.assert_pandas_equal(expected2, computed2)

        blob_name = ids.build_blob_name_2('a11-000000000000.csv.gz')
        computed3 = load.bucket_to_dataframe(blob_name, decompress=True)
        self.assert_pandas_equal(expected3, computed3)
示例#2
0
 def test_write_disposition_default_bucket_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [1]})
     blob_name = ids.build_blob_name_2('s10')
     load.dataframe_to_bucket(expected, blob_name)
     gpl = create_loader_quick_setup(
         bucket_dir_path=constants.bucket_subdir_path, local_dir_path=None)
     for _ in range(2):
         gpl.load(source='bucket', destination='dataset', data_name='s10')
     computed = load.dataset_to_dataframe('s10')
     self.assert_pandas_equal(expected, computed)
示例#3
0
 def test_dataframe_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [1, 2, 3], 'y': [1, 2, 4]})
     populate()
     gpl = create_loader_quick_setup()
     gpl.load(source='dataframe',
              destination='dataset',
              dataframe=expected,
              data_name='a1')
     computed = load.dataset_to_dataframe('a1')
     self.assert_pandas_equal(expected, computed)
示例#4
0
 def test_write_empty_local_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [1]})
     local_file_path = ids.build_local_file_path_1('s12')
     load.dataframe_to_local(expected, local_file_path)
     gpl = create_loader(local_dir_path=constants.local_subdir_path)
     gpl.load(source='local',
              destination='dataset',
              data_name='s12',
              write_disposition='WRITE_EMPTY')
     computed = load.dataset_to_dataframe('s12')
     self.assert_pandas_equal(expected, computed)
示例#5
0
 def test_write_truncate_query_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [1]})
     gpl = create_loader_quick_setup(bucket_name=None, local_dir_path=None)
     for _ in range(2):
         gpl.load(source='query',
                  destination='dataset',
                  query='select 1 as x',
                  data_name='s11',
                  write_disposition='WRITE_TRUNCATE')
     computed = load.dataset_to_dataframe('s11')
     self.assert_pandas_equal(expected, computed)
示例#6
0
 def test_query_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [3, 2], 'y': ['a', 'b']})
     populate_dataset()
     gpl = create_loader(gs_client=None, bucket_name=None)
     gpl.load(
         source='query',
         destination='dataset',
         query="select 3 as x, 'a' as y union all select 2 as x, 'b' as y",
         data_name='a0')
     computed = load.dataset_to_dataframe('a0')
     self.assert_pandas_equal(expected, computed)
示例#7
0
 def test_bucket_to_dataset(self):
     expected = pandas.DataFrame(
         data={'x': [f'a{i}_bucket' for i in range(7, 12)]})
     populate_dataset()
     populate_bucket()
     gpl = create_loader_quick_setup(local_dir_path=None)
     gpl.load(
         source='bucket',
         destination='dataset',
         data_name='a',
         bq_schema=[bigquery.SchemaField(name='x', field_type='STRING')])
     computed = load.dataset_to_dataframe('a')
     self.assert_pandas_equal(expected, computed)
示例#8
0
 def test_download_upload(self):
     expected = pandas.DataFrame(data={'x': [3, 2]})
     gpl = create_loader(bucket_dir_path=constants.bucket_dir_path,
                         local_dir_path=constants.local_subdir_path)
     df0 = gpl.load(source='query',
                    destination='dataframe',
                    query='select 3 as x union all select 2 as x')
     gpl.load(source='dataframe',
              destination='dataset',
              dataframe=df0,
              data_name='b1')
     computed = load.dataset_to_dataframe('b1')
     self.assert_pandas_equal(expected, computed)
示例#9
0
 def test_write_append_dataframe_to_dataset(self):
     expected = pandas.DataFrame(data={'x': [0, 1]})
     df00 = pandas.DataFrame(data={'x': [0]})
     df01 = pandas.DataFrame(data={'x': [1]})
     gpl = create_loader(chunk_size=2**18, timeout=5)
     gpl.load(source='dataframe',
              destination='dataset',
              dataframe=df00,
              data_name='s13')
     gpl.load(source='dataframe',
              destination='dataset',
              dataframe=df01,
              data_name='s13',
              write_disposition='WRITE_APPEND')
     computed = load.dataset_to_dataframe('s13')
     self.assert_pandas_equal(expected, computed)