Exemplo n.º 1
0
 def test_build_query_components_when_no_pcoll_queried(self):
     query = """SELECT CAST(1 AS INT) AS `id`,
                   CAST('foo' AS VARCHAR) AS `str`,
                   CAST(3.14  AS DOUBLE) AS `flt`"""
     processed_query, sql_source = _build_query_components(query, {})
     self.assertEqual(processed_query, query)
     self.assertIsInstance(sql_source, beam.Pipeline)
 def test_build_query_components_when_no_pcoll_queried(self):
     query = """SELECT CAST(1 AS INT) AS `id`,
                   CAST('foo' AS VARCHAR) AS `str`,
                   CAST(3.14  AS DOUBLE) AS `flt`"""
     processed_query, sql_source, chain = _build_query_components(
         query, {}, 'output')
     self.assertEqual(processed_query, query)
     self.assertIsInstance(sql_source, beam.Pipeline)
     self.assertIsInstance(chain.current.source, beam.Pipeline)
     self.assertEqual('output', chain.current.output_name)
     self.assertEqual(query, chain.current.query)
Exemplo n.º 3
0
    def test_build_query_components_when_unbounded_pcolls_queried(self):
        p = beam.Pipeline()
        pcoll = p | beam.io.ReadFromPubSub(
            subscription='projects/fake-project/subscriptions/fake_sub')
        ib.watch(locals())
        query = 'SELECT * FROM pcoll'
        found = {'pcoll': pcoll}

        with patch(
                'apache_beam.runners.interactive.sql.beam_sql_magics.'
                'pcolls_from_streaming_cache', lambda a, b, c: found):
            _, sql_source = _build_query_components(query, found)
            self.assertIs(sql_source, pcoll)
Exemplo n.º 4
0
    def test_build_query_components_when_single_pcoll_queried(self):
        p = beam.Pipeline()
        target = p | beam.Create([1, 2, 3])
        ib.watch(locals())
        query = 'SELECT * FROM target where a=1'
        found = {'target': target}

        with patch(
                'apache_beam.runners.interactive.sql.beam_sql_magics.'
                'pcoll_from_file_cache', lambda a, b, c, d: target):
            processed_query, sql_source = _build_query_components(query, found)

            self.assertEqual(processed_query,
                             'SELECT * FROM PCOLLECTION where a=1')
            self.assertIsInstance(sql_source, beam.PCollection)
Exemplo n.º 5
0
    def test_build_query_components_when_multiple_pcolls_queried(self):
        p = beam.Pipeline()
        pcoll_1 = p | 'Create 1' >> beam.Create([1, 2, 3])
        pcoll_2 = p | 'Create 2' >> beam.Create([4, 5, 6])
        ib.watch(locals())
        query = 'SELECT * FROM pcoll_1 JOIN pcoll_2 USING (a)'
        found = {'pcoll_1': pcoll_1, 'pcoll_2': pcoll_2}

        with patch(
                'apache_beam.runners.interactive.sql.beam_sql_magics.'
                'pcoll_from_file_cache', lambda a, b, c, d: pcoll_1):
            processed_query, sql_source = _build_query_components(query, found)

            self.assertEqual(processed_query, query)
            self.assertIsInstance(sql_source, dict)
            self.assertIn('pcoll_1', sql_source)
            self.assertIn('pcoll_2', sql_source)
    def test_build_query_components_when_single_pcoll_queried(self):
        p = beam.Pipeline()
        target = p | beam.Create([1, 2, 3])
        ib.watch(locals())
        query = 'SELECT * FROM target where a=1'
        found = {'target': target}

        with patch(
                'apache_beam.runners.interactive.sql.beam_sql_magics.'
                'unreify_from_cache', lambda pipeline, cache_key,
                cache_manager, element_type: target):
            processed_query, sql_source, chain = _build_query_components(
                query, found, 'output')
            expected_query = 'SELECT * FROM PCOLLECTION where a=1'
            self.assertEqual(expected_query, processed_query)
            self.assertIsInstance(sql_source, beam.PCollection)
            self.assertIn('target', chain.current.source)
            self.assertEqual(expected_query, chain.current.query)
            self.assertEqual('output', chain.current.output_name)
    def test_build_query_components_when_multiple_pcolls_queried(self):
        p = beam.Pipeline()
        pcoll_1 = p | 'Create 1' >> beam.Create([1, 2, 3])
        pcoll_2 = p | 'Create 2' >> beam.Create([4, 5, 6])
        ib.watch(locals())
        query = 'SELECT * FROM pcoll_1 JOIN pcoll_2 USING (a)'
        found = {'pcoll_1': pcoll_1, 'pcoll_2': pcoll_2}

        with patch(
                'apache_beam.runners.interactive.sql.beam_sql_magics.'
                'unreify_from_cache', lambda pipeline, cache_key,
                cache_manager, element_type: pcoll_1):
            processed_query, sql_source, chain = _build_query_components(
                query, found, 'output')

            self.assertEqual(processed_query, query)
            self.assertIsInstance(sql_source, dict)
            self.assertIn('pcoll_1', sql_source)
            self.assertIn('pcoll_2', sql_source)
            self.assertIn('pcoll_1', chain.current.source)
            self.assertIn('pcoll_2', chain.current.source)
            self.assertEqual(query, chain.current.query)
            self.assertEqual('output', chain.current.output_name)