def test_file_retrieval(self):
        file_manager = InMemoryFileManager({
            'path/to/a': b'a',
            'path/to/b': b'b' * 37
        })
        retrieval_service = artifact_service.ArtifactRetrievalService(
            file_manager.file_reader, chunk_size=10)
        dep_a = self.file_artifact('path/to/a')
        self.assertEqual(
            retrieval_service.ResolveArtifacts(
                beam_artifact_api_pb2.ResolveArtifactsRequest(
                    artifacts=[dep_a])),
            beam_artifact_api_pb2.ResolveArtifactsResponse(
                replacements=[dep_a]))

        self.assertEqual(
            list(
                retrieval_service.GetArtifact(
                    beam_artifact_api_pb2.GetArtifactRequest(artifact=dep_a))),
            [beam_artifact_api_pb2.GetArtifactResponse(data=b'a')])
        self.assertEqual(
            list(
                retrieval_service.GetArtifact(
                    beam_artifact_api_pb2.GetArtifactRequest(
                        artifact=self.file_artifact('path/to/b')))),
            [
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 10),
                beam_artifact_api_pb2.GetArtifactResponse(data=b'b' * 7)
            ])
 def test_embedded_retrieval(self):
     retrieval_service = artifact_service.ArtifactRetrievalService(None)
     embedded_dep = self.embedded_artifact(b'some_data')
     self.assertEqual(
         list(
             retrieval_service.GetArtifact(
                 beam_artifact_api_pb2.GetArtifactRequest(
                     artifact=embedded_dep))),
         [beam_artifact_api_pb2.GetArtifactResponse(data=b'some_data')])
 def test_url_retrieval(self):
     retrieval_service = artifact_service.ArtifactRetrievalService(None)
     url_dep = beam_runner_api_pb2.ArtifactInformation(
         type_urn=common_urns.artifact_types.URL.urn,
         type_payload=beam_runner_api_pb2.ArtifactUrlPayload(
             url='file:' + quote(__file__)).SerializeToString())
     content = b''.join([
         r.data for r in retrieval_service.GetArtifact(
             beam_artifact_api_pb2.GetArtifactRequest(artifact=url_dep))
     ])
     with open(__file__, 'rb') as fin:
         self.assertEqual(content, fin.read())
示例#4
0
def store_artifact(artifact, service, dest_dir):
  hasher = hashlib.sha256()
  with tempfile.NamedTemporaryFile(dir=dest_dir, delete=False) as fout:
    for block in service.GetArtifact(
        beam_artifact_api_pb2.GetArtifactRequest(artifact=artifact)):
      hasher.update(block.data)
      fout.write(block.data)
  return beam_runner_api_pb2.ArtifactInformation(
      type_urn=common_urns.artifact_types.FILE.urn,
      type_payload=beam_runner_api_pb2.ArtifactFilePayload(
          path=fout.name, sha256=hasher.hexdigest()).SerializeToString(),
      role_urn=artifact.role_urn,
      role_payload=artifact.role_payload)
示例#5
0
def resolve_as_files(retrieval_service, file_writer, dependencies):
  """Translates a set of dependencies into file-based dependencies."""
  # Resolve until nothing changes.  This ensures that they can be fetched.
  resolution = retrieval_service.ResolveArtifactss(
      beam_artifact_api_pb2.ResolveArtifactsRequest(
          artifacts=dependencies,
          # Anything fetchable will do.
          # TODO(robertwb): Take advantage of shared filesystems, urls.
          preferred_urns=[],
      ))
  dependencies = resolution.replacements

  # Fetch each of the dependencies, using file_writer to store them as
  # file-based artifacts.
  # TODO(robertwb): Consider parallelizing the actual writes.
  for dep in dependencies:
    if dep.role_urn == common_urns.artifact_roles.STAGING_TO.urn:
      base_name = os.path.basename(
          proto_utils.parse_Bytes(
              dep.role_payload,
              beam_runner_api_pb2.ArtifactStagingToRolePayload).staged_name)
    else:
      base_name = None
    unique_name = '-'.join(
        filter(
            None,
            [hashlib.sha256(dep.SerializeToString()).hexdigest(), base_name]))
    file_handle, path = file_writer(unique_name)
    with file_handle as fout:
      for chunk in retrieval_service.GetArtifact(
          beam_artifact_api_pb2.GetArtifactRequest(artifact=dep)):
        fout.write(chunk.data)
    yield beam_runner_api_pb2.ArtifactInformation(
        type_urn=common_urns.artifact_types.FILE.urn,
        type_payload=beam_runner_api_pb2.ArtifactFilePayload(
            path=path).SerializeToString(),
        role_urn=dep.role_urn,
        role_payload=dep.role_payload)
示例#6
0
 def retrieve_artifact(retrieval_service, retrieval_token, name):
   return b''.join(chunk.data for chunk in retrieval_service.GetArtifact(
       beam_artifact_api_pb2.GetArtifactRequest(
           retrieval_token=retrieval_token,
           name=name)))