Пример #1
0
    def test_create_do_with_collection_side_bigquery_write(self):
        elements = ['aa', 'bb']
        side_elements = ['x', 'y']
        output_buffer = []
        patch_target = 'google.cloud.dataflow.io.bigquery.BigQueryReader'
        with mock.patch(target=patch_target) as mock_class:
            # Setup the reader so it will yield the values in 'side_elements'.
            reader_mock = mock_class.return_value
            reader_mock.__enter__.return_value = reader_mock
            # Use a lambda so that multiple readers can be created, each reading the
            # entirety of the side elements.
            reader_mock.__iter__.side_effect = lambda: (x
                                                        for x in side_elements)

            executor.MapTaskExecutor().execute(
                make_map_task([
                    maptask.WorkerRead(inmemory.InMemorySource(
                        elements=[pickler.dumps(e) for e in elements],
                        start_index=0,
                        end_index=3),
                                       output_coders=[self.OUTPUT_CODER]),
                    maptask.
                    WorkerDoFn(serialized_fn=pickle_with_side_inputs(
                        ptransform.CallableWrapperDoFn(
                            lambda x, side: ['%s:%s' % (x, s) for s in side]),
                        tag_and_type=('bigquery',
                                      pvalue.IterablePCollectionView, ())),
                               output_tags=['out'],
                               input=(0, 0),
                               side_inputs=[
                                   maptask.WorkerSideInputSource(
                                       bigquery.BigQuerySource(
                                           project='project',
                                           dataset='dataset',
                                           table='table',
                                           coder=get_bigquery_source_coder()),
                                       tag='bigquery')
                               ],
                               output_coders=[self.OUTPUT_CODER]),
                    maptask.WorkerInMemoryWrite(
                        output_buffer=output_buffer,
                        input=(1, 0),
                        output_coders=(self.OUTPUT_CODER, ))
                ]))
        # The side source was specified as collection therefore we should see
        # all elements of the side source.
        self.assertEqual(['aa:x', 'aa:y', 'bb:x', 'bb:y'],
                         sorted(output_buffer))
Пример #2
0
  def test_create_do_with_singleton_side_bigquery_write(self):
    elements = ['abc', 'def', 'ghi']
    side_elements = ['x', 'y', 'z']
    output_buffer = []
    patch_target = 'google.cloud.dataflow.io.bigquery.BigQueryReader'
    with mock.patch(target=patch_target) as mock_class:
      # Setup the reader so it will yield the values in 'side_elements'.
      reader_mock = mock_class.return_value
      reader_mock.__enter__.return_value = reader_mock
      reader_mock.__iter__.return_value = (x for x in side_elements)

      pickled_elements = [pickler.dumps(e) for e in elements]
      executor.MapTaskExecutor().execute(make_map_task([
          maptask.WorkerRead(
              inmemory.InMemorySource(elements=pickled_elements,
                                      start_index=0,
                                      end_index=3),
              output_coders=[self.OUTPUT_CODER]),
          maptask.WorkerDoFn(
              serialized_fn=pickle_with_side_inputs(
                  ptransform.CallableWrapperDoFn(
                      lambda x, side: ['%s:%s' % (x, side)]),
                  tag_and_type=('bigquery', pvalue.SingletonPCollectionView,
                                (False, None))),
              output_tags=['out'], input=(0, 0),
              side_inputs=[
                  maptask.WorkerSideInputSource(
                      bigquery.BigQuerySource(
                          project='project',
                          dataset='dataset',
                          table='table',
                          coder=get_bigquery_source_coder()),
                      tag='bigquery')],
              output_coders=[self.OUTPUT_CODER]),
          maptask.WorkerInMemoryWrite(
              output_buffer=output_buffer,
              input=(1, 0),
              output_coders=(self.OUTPUT_CODER,))]))
    # The side source was specified as singleton therefore we should see
    # only the first element appended.
    self.assertEqual(['abc:x', 'def:x', 'ghi:x'], output_buffer)