Exemplo n.º 1
0
    def test_parse_pipeline_options(self):
        assert_that(
            sdk_worker_main._parse_pipeline_options(
                '{"options": {' + '"m_option": "/tmp/requirements.txt", ' +
                '"m_m_option":["beam_fn_api"]' + '}}').get_all_options(),
            all_of(has_entry('m_m_option', ['beam_fn_api']),
                   has_entry('m_option', '/tmp/requirements.txt')))

        assert_that(
            sdk_worker_main._parse_pipeline_options(
                '{"beam:option:m_option:v1": "/tmp/requirements.txt", ' +
                '"beam:option:m_m_option:v1":["beam_fn_api"]}').
            get_all_options(),
            all_of(has_entry('m_m_option', ['beam_fn_api']),
                   has_entry('m_option', '/tmp/requirements.txt')))

        assert_that(
            sdk_worker_main._parse_pipeline_options(
                '{"options": {"beam:option:m_option:v":"mock_val"}}').
            get_all_options(), has_entry('beam:option:m_option:v', 'mock_val'))

        assert_that(
            sdk_worker_main._parse_pipeline_options(
                '{"options": {"eam:option:m_option:v1":"mock_val"}}').
            get_all_options(), has_entry('eam:option:m_option:v1', 'mock_val'))

        assert_that(
            sdk_worker_main._parse_pipeline_options(
                '{"options": {"eam:option:m_option:v":"mock_val"}}').
            get_all_options(), has_entry('eam:option:m_option:v', 'mock_val'))
Exemplo n.º 2
0
 def test_parse_pipeline_options(self):
     expected_options = PipelineOptions([])
     expected_options.view_as(
         SdkWorkerMainTest.MockOptions).m_m_option = ['beam_fn_api']
     expected_options.view_as(
         SdkWorkerMainTest.MockOptions).m_option = '/tmp/requirements.txt'
     self.assertEqual(
         expected_options.get_all_options(),
         sdk_worker_main._parse_pipeline_options(
             '{"options": {' + '"m_option": "/tmp/requirements.txt", ' +
             '"m_m_option":["beam_fn_api"]' + '}}').get_all_options())
     self.assertEqual(
         expected_options.get_all_options(),
         sdk_worker_main._parse_pipeline_options(
             '{"beam:option:m_option:v1": "/tmp/requirements.txt", ' +
             '"beam:option:m_m_option:v1":["beam_fn_api"]}').
         get_all_options())
     self.assertEqual(
         {'beam:option:m_option:v': 'mock_val'},
         sdk_worker_main._parse_pipeline_options(
             '{"options": {"beam:option:m_option:v":"mock_val"}}').
         get_all_options(drop_default=True))
     self.assertEqual(
         {'eam:option:m_option:v1': 'mock_val'},
         sdk_worker_main._parse_pipeline_options(
             '{"options": {"eam:option:m_option:v1":"mock_val"}}').
         get_all_options(drop_default=True))
     self.assertEqual(
         {'eam:option:m_option:v': 'mock_val'},
         sdk_worker_main._parse_pipeline_options(
             '{"options": {"eam:option:m_option:v":"mock_val"}}').
         get_all_options(drop_default=True))
 def test_parse_pipeline_options(self):
   expected_options = PipelineOptions([])
   expected_options.view_as(
       SdkWorkerMainTest.MockOptions).m_m_option = [
           'worker_threads=1', 'beam_fn_api'
       ]
   expected_options.view_as(
       SdkWorkerMainTest.MockOptions).m_option = '/tmp/requirements.txt'
   self.assertEqual(
       {'m_m_option': ['worker_threads=1']},
       sdk_worker_main._parse_pipeline_options(
           '{"options": {"m_m_option":["worker_threads=1"]}}')
       .get_all_options(drop_default=True))
   self.assertEqual(
       expected_options.get_all_options(),
       sdk_worker_main._parse_pipeline_options(
           '{"options": {' +
           '"m_option": "/tmp/requirements.txt", ' +
           '"m_m_option":["worker_threads=1", "beam_fn_api"]' +
           '}}').get_all_options())
   self.assertEqual(
       {'m_m_option': ['worker_threads=1']},
       sdk_worker_main._parse_pipeline_options(
           '{"beam:option:m_m_option:v1":["worker_threads=1"]}')
       .get_all_options(drop_default=True))
   self.assertEqual(
       expected_options.get_all_options(),
       sdk_worker_main._parse_pipeline_options(
           '{"beam:option:m_option:v1": "/tmp/requirements.txt", ' +
           '"beam:option:m_m_option:v1":["worker_threads=1", ' +
           '"beam_fn_api"]}').get_all_options())
   self.assertEqual(
       {'beam:option:m_option:v': 'mock_val'},
       sdk_worker_main._parse_pipeline_options(
           '{"options": {"beam:option:m_option:v":"mock_val"}}')
       .get_all_options(drop_default=True))
   self.assertEqual(
       {'eam:option:m_option:v1': 'mock_val'},
       sdk_worker_main._parse_pipeline_options(
           '{"options": {"eam:option:m_option:v1":"mock_val"}}')
       .get_all_options(drop_default=True))
   self.assertEqual(
       {'eam:option:m_option:v': 'mock_val'},
       sdk_worker_main._parse_pipeline_options(
           '{"options": {"eam:option:m_option:v":"mock_val"}}')
       .get_all_options(drop_default=True))
Exemplo n.º 4
0
    def _start_sdk_worker_main(
            self, start_worker_request: beam_fn_api_pb2.StartWorkerRequest):
        params = start_worker_request.params
        self._parse_param_lock.acquire()
        # The first thread to start is responsible for preparing all execution environment.
        if not self._ref_cnt:
            if 'PYTHONPATH' in params:
                self._old_python_path = sys.path[:]
                python_path_list = params['PYTHONPATH'].split(':')
                python_path_list.reverse()
                for path in python_path_list:
                    sys.path.insert(0, path)
            if '_PYTHON_WORKING_DIR' in params:
                self._old_working_dir = os.getcwd()
                os.chdir(params['_PYTHON_WORKING_DIR'])
            os.environ.update(params)
        self._ref_cnt += 1
        self._parse_param_lock.release()

        # read job information from provision stub
        metadata = [("worker_id", start_worker_request.worker_id)]
        provision_endpoint = start_worker_request.provision_endpoint.url
        with grpc.insecure_channel(provision_endpoint) as channel:
            client = ProvisionServiceStub(channel=channel)
            info = client.GetProvisionInfo(GetProvisionInfoRequest(),
                                           metadata=metadata).info
            options = json_format.MessageToJson(info.pipeline_options)
            logging_endpoint = info.logging_endpoint.url
            control_endpoint = info.control_endpoint.url

        try:
            logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor(
                url=logging_endpoint)

            # Send all logs to the runner.
            fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
            logging.getLogger().setLevel(logging.INFO)
            # Remove all the built-in log handles
            logging.getLogger().handlers = []
            logging.getLogger().addHandler(fn_log_handler)
            logging.info("Starting up Python worker in loopback mode.")
        except Exception:
            _LOGGER.error(
                "Failed to set up logging handler, continuing without.",
                exc_info=True)
            fn_log_handler = None

        sdk_pipeline_options = sdk_worker_main._parse_pipeline_options(options)

        _worker_id = start_worker_request.worker_id

        try:
            control_service_descriptor = endpoints_pb2.ApiServiceDescriptor(
                url=control_endpoint)
            status_service_descriptor = endpoints_pb2.ApiServiceDescriptor()

            experiments = sdk_pipeline_options.view_as(
                DebugOptions).experiments or []
            enable_heap_dump = 'enable_heap_dump' in experiments
            SdkHarness(control_address=control_service_descriptor.url,
                       status_address=status_service_descriptor.url,
                       worker_id=_worker_id,
                       state_cache_size=sdk_worker_main._get_state_cache_size(
                           experiments),
                       data_buffer_time_limit_ms=sdk_worker_main.
                       _get_data_buffer_time_limit_ms(experiments),
                       profiler_factory=profiler.Profile.factory_from_options(
                           sdk_pipeline_options.view_as(ProfilingOptions)),
                       enable_heap_dump=enable_heap_dump).run()
        except:  # pylint: disable=broad-except
            _LOGGER.exception('Python sdk harness failed: ')
            raise
        finally:
            self._parse_param_lock.acquire()
            self._ref_cnt -= 1
            # The last thread to exit is responsible for reverting working directory and sys.path.
            if self._ref_cnt == 0:
                if self._old_python_path is not None:
                    sys.path.clear()
                    for item in self._old_python_path:
                        sys.path.append(item)
                    self._old_python_path = None
                if self._old_working_dir is not None:
                    os.chdir(self._old_working_dir)
                    self._old_working_dir = None
            self._parse_param_lock.release()
            if fn_log_handler:
                fn_log_handler.close()
Exemplo n.º 5
0
    def _start_sdk_worker_main(
            self, start_worker_request: beam_fn_api_pb2.StartWorkerRequest):
        params = start_worker_request.params
        self._parse_param_lock.acquire()
        if 'PYTHONPATH' in params:
            python_path_list = params['PYTHONPATH'].split(':')
            python_path_list.reverse()
            for path in python_path_list:
                sys.path.insert(0, path)
        if '_PYTHON_WORKING_DIR' in params:
            os.chdir(params['_PYTHON_WORKING_DIR'])
        os.environ.update(params)
        self._parse_param_lock.release()

        # read job information from provision stub
        metadata = [("worker_id", start_worker_request.worker_id)]
        provision_endpoint = start_worker_request.provision_endpoint.url
        with grpc.insecure_channel(provision_endpoint) as channel:
            client = ProvisionServiceStub(channel=channel)
            info = client.GetProvisionInfo(GetProvisionInfoRequest(),
                                           metadata=metadata).info
            options = json_format.MessageToJson(info.pipeline_options)
            logging_endpoint = info.logging_endpoint.url
            control_endpoint = info.control_endpoint.url

        try:
            logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor(
                url=logging_endpoint)

            # Send all logs to the runner.
            fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
            logging.getLogger().setLevel(logging.ERROR)
            logging.getLogger().addHandler(fn_log_handler)
        except Exception:
            _LOGGER.error(
                "Failed to set up logging handler, continuing without.",
                exc_info=True)
            fn_log_handler = None

        sdk_pipeline_options = sdk_worker_main._parse_pipeline_options(options)

        _worker_id = start_worker_request.worker_id

        try:
            control_service_descriptor = endpoints_pb2.ApiServiceDescriptor(
                url=control_endpoint)
            status_service_descriptor = endpoints_pb2.ApiServiceDescriptor()

            experiments = sdk_pipeline_options.view_as(
                DebugOptions).experiments or []
            enable_heap_dump = 'enable_heap_dump' in experiments
            SdkHarness(control_address=control_service_descriptor.url,
                       status_address=status_service_descriptor.url,
                       worker_id=_worker_id,
                       state_cache_size=sdk_worker_main._get_state_cache_size(
                           experiments),
                       data_buffer_time_limit_ms=sdk_worker_main.
                       _get_data_buffer_time_limit_ms(experiments),
                       profiler_factory=profiler.Profile.factory_from_options(
                           sdk_pipeline_options.view_as(ProfilingOptions)),
                       enable_heap_dump=enable_heap_dump).run()
        except:  # pylint: disable=broad-except
            _LOGGER.exception('Python sdk harness failed: ')
            raise
        finally:
            if fn_log_handler:
                fn_log_handler.close()