Exemplo n.º 1
0
 def to_runner_api_parameter(self, context):
     # Sort the output tags so that the order is deterministic and we are able
     # to test equality on a roundtrip through the to/from proto apis.
     return (common_urns.primitives.TEST_STREAM.urn,
             beam_runner_api_pb2.TestStreamPayload(
                 coder_id=context.coders.get_id(self.coder),
                 events=[e.to_runner_api(self.coder) for e in self._events],
                 endpoint=ApiServiceDescriptor(url=self._endpoint)))
Exemplo n.º 2
0
    check_not_empty(worker_id, "No id provided.")
    check_not_empty(provision_endpoint, "No provision endpoint provided.")

    logging.info("Initializing Python harness: %s" % " ".join(sys.argv))

    if 'PYFLINK_LOOPBACK_SERVER_ADDRESS' in os.environ:
        logging.info("Starting up Python harness in loopback mode.")

        params = dict(os.environ)
        params.update({'SEMI_PERSISTENT_DIRECTORY': semi_persist_dir})
        with grpc.insecure_channel(
                os.environ['PYFLINK_LOOPBACK_SERVER_ADDRESS']) as channel:
            client = BeamFnExternalWorkerPoolStub(channel=channel)
            request = StartWorkerRequest(
                worker_id=worker_id,
                provision_endpoint=ApiServiceDescriptor(
                    url=provision_endpoint),
                params=params)
            client.StartWorker(request)
    else:
        logging.info("Starting up Python harness in a standalone process.")
        metadata = [("worker_id", worker_id)]

        # read job information from provision stub
        with grpc.insecure_channel(provision_endpoint) as channel:
            client = ProvisionServiceStub(channel=channel)
            info = client.GetProvisionInfo(GetProvisionInfoRequest(),
                                           metadata=metadata).info
            options = json_format.MessageToJson(info.pipeline_options)
            logging_endpoint = info.logging_endpoint.url
            control_endpoint = info.control_endpoint.url
Exemplo n.º 3
0
                metadata=metadata)
            with open(file_path, "wb") as f:
                sha256obj = hashlib.sha256()
                for artifact_chunk in stream:
                    sha256obj.update(artifact_chunk.data)
                    f.write(artifact_chunk.data)
                hash_value = sha256obj.hexdigest()
            if hash_value != sha256:
                raise Exception("The sha256 hash value: %s of the downloaded file: %s is not the"
                                " same as the expected hash value: %s" %
                                (hash_value, file_path, sha256))
            os.chmod(file_path, int(str(permissions), 8))

    pip_install_requirements()

    os.environ["WORKER_ID"] = worker_id
    os.environ["PIPELINE_OPTIONS"] = options
    os.environ["SEMI_PERSISTENT_DIRECTORY"] = semi_persist_dir
    os.environ["LOGGING_API_SERVICE_DESCRIPTOR"] = text_format.MessageToString(
        ApiServiceDescriptor(url=logging_endpoint))
    os.environ["CONTROL_API_SERVICE_DESCRIPTOR"] = text_format.MessageToString(
        ApiServiceDescriptor(url=control_endpoint))

    env = dict(os.environ)

    if "FLINK_BOOT_TESTING" in os.environ and os.environ["FLINK_BOOT_TESTING"] == "1":
        exit(0)

    call([python_exec, "-m", "pyflink.fn_execution.sdk_worker_main"],
         stdout=sys.stdout, stderr=sys.stderr, env=env)