Exemplo n.º 1
0
 def copy_file(self,
               source_commit,
               source_path,
               dest_commit,
               dest_path,
               overwrite=None):
     """
     Efficiently copies files already in PFS. Note that the destination
     repo cannot be an output repo, or the copy operation will (as of
     1.9.0) silently fail.
     Params:
     * source_commit: A tuple, string, or `Commit` object representing the
     commit for the source file.
     * source_path: A string specifying the path of the source file.
     * dest_commit: A tuple, string, or `Commit` object representing the
     commit for the destination file.
     * dest_path: A string specifying the path of the destination file.
     * overwrite: Am optional bool specifying whether to overwrite the
     destination file if it already exists.
     """
     req = proto.CopyFileRequest(
         src=proto.File(commit=commit_from(source_commit),
                        path=source_path),
         dst=proto.File(commit=commit_from(dest_commit), path=dest_path),
         overwrite=overwrite,
     )
     self.stub.CopyFile(req, metadata=self.metadata)
Exemplo n.º 2
0
    def list_commit(self,
                    repo_name,
                    to_commit=None,
                    from_commit=None,
                    number=0):
        """
        Gets a list of CommitInfo objects.

        Params:
        * repo_name: If only `repo_name` is given, all commits in the repo are
        returned.
        * to_commit: Optional. Only the ancestors of `to`, including `to`
        itself, are considered.
        * from_commit: Optional. Only the descendants of `from`, including
        `from` itself, are considered.
        * number: Optional. Determines how many commits are returned.  If
        `number` is 0, all commits that match the aforementioned criteria are
        returned.
        """
        req = proto.ListCommitRequest(repo=proto.Repo(name=repo_name),
                                      number=number)
        if to_commit is not None:
            req.to.CopyFrom(commit_from(to_commit))
        if from_commit is not None:
            getattr(req, 'from').CopyFrom(commit_from(from_commit))
        res = self.stub.ListCommit(req, metadata=self.metadata)
        if hasattr(res, 'commit_info'):
            return res.commit_info
        return []
Exemplo n.º 3
0
    def list_job(self, pipeline_name=None, input_commit=None, output_commit=None, history=None):
        """
        Lists jobs. Yields `JobInfo` objects.

        Params:

        * pipeline_name: An optional string representing a pipeline name to
        filter on.
        * input_commit: An optional list of tuples, strings, or `Commit`
        objects representing input commits to filter on.
        * output_commit: An optional tuple, string, or `Commit` object
        representing an output commit to filter on.
        * history: An optional int that indicates to return jobs from
          historical versions of pipelines. Semantics are:
            * 0: Return jobs from the current version of the pipeline or
              pipelines.
            * 1: Return the above and jobs from the next most recent version
            * 2: etc.
            * -1: Return jobs from all historical versions.
        """

        pipeline = proto.Pipeline(name=pipeline_name) if pipeline_name is not None else None

        if isinstance(input_commit, list):
            input_commit = [commit_from(ic) for ic in input_commit]
        elif input_commit is not None:
            input_commit = [commit_from(input_commit)]

        output_commit = commit_from(output_commit) if output_commit is not None else None

        req = proto.ListJobRequest(pipeline=pipeline, input_commit=input_commit,
                                   output_commit=output_commit, history=history)

        return self.stub.ListJobStream(req, metadata=self.metadata)
Exemplo n.º 4
0
 def list_job(self, pipeline_name=None, input_commit=None, output_commit=None):
     if isinstance(input_commit, list):
         input_commit = [commit_from(ic) for ic in input_commit]
     elif isinstance(input_commit, six.string_types):
         input_commit = [commit_from(input_commit)]
     if output_commit:
         output_commit = commit_from(output_commit)
     req = proto.ListJobRequest(pipeline=proto.Pipeline(name=pipeline_name), input_commit=input_commit,
                                output_commit=output_commit)
     return self.stub.ListJob(req, metadata=self.metadata)
Exemplo n.º 5
0
 def walk_file(self, commit, path):
     """
     Walks over all descendant files in a directory. Returns a generator of
     `FileInfo` objects.
     Params:
     * commit: A tuple, string, or `Commit` object representing the commit.
     * path: The path to the directory.
     """
     commit = commit_from(commit)
     f = proto.File(commit=commit_from(commit), path=path)
     req = proto.WalkFileRequest(file=f)
     return self.stub.WalkFile(req, metadata=self.metadata)
Exemplo n.º 6
0
    def flush_commit(self, commits, repos=tuple()):
        """
        Blocks until all of the commits which have a set of commits as
        provenance have finished. For commits to be considered they must have
        all of the specified commits as provenance. This in effect waits for
        all of the jobs that are triggered by a set of commits to complete.
        It returns an error if any of the commits it's waiting on are
        cancelled due to one of the jobs encountering an error during runtime.
        Note that it's never necessary to call FlushCommit to run jobs,
        they'll run no matter what, FlushCommit just allows you to wait for
        them to complete and see their output once they do. This returns an
        iterator of CommitInfo objects.

        Params:
        * commits: A commit or a list of commits to wait on.
        * repos: Optional. Only the commits up to and including those repos.
        will be considered, otherwise all repos are considered.
        """
        req = proto.FlushCommitRequest(
            commits=[commit_from(c) for c in commits],
            to_repos=[proto.Repo(name=r) for r in repos])
        res = self.stub.FlushCommit(req, metadata=self.metadata)

        for commit in res:
            yield commit
    def list_file(self, commit, path, history=None, include_contents=None):
        """
        Lists the files in a directory.

        Params:

        * commit: A tuple, string, or `Commit` object representing the commit.
        * path: The path to the directory.
        * history: An optional int that indicates to return jobs from
        historical versions of pipelines. Semantics are:
         0: Return jobs from the current version of the pipeline or pipelines.
         1: Return the above and jobs from the next most recent version
         2: etc.
        -1: Return jobs from all historical versions.
        * include_contents: An optional bool. If `True`, file contents are
        included.
        """

        req = proto.ListFileRequest(
            file=proto.File(commit=commit_from(commit), path=path),
            history=history,
            full=include_contents,
        )

        return self.stub.ListFileStream(req, metadata=self.metadata)
Exemplo n.º 8
0
    def get_file(self,
                 commit,
                 path,
                 offset_bytes=0,
                 size_bytes=0,
                 extract_value=True):
        """
        Returns an iterator of the contents contents of a file at a specific Commit.

        Params:
        * commit: A tuple, string, or Commit object representing the commit.
        * path: The path of the file.
        * offset_bytes: Optional. specifies a number of bytes that should be
        skipped in the beginning of the file.
        * size_bytes: Optional. limits the total amount of data returned, note
        you will get fewer bytes than size if you pass a value larger than the
        size of the file. If size is set to 0 then all of the data will be
        returned.
        * extract_value: If True, then an ExtractValueIterator will be return,
        which will iterate over the bytes of the file. If False, then the
        protobuf response iterator will return.
        """
        req = proto.GetFileRequest(file=proto.File(commit=commit_from(commit),
                                                   path=path),
                                   offset_bytes=offset_bytes,
                                   size_bytes=size_bytes)
        res = self.stub.GetFile(req, metadata=self.metadata)
        if extract_value:
            return ExtractValueIterator(res)
        return res
Exemplo n.º 9
0
 def glob_file(self, commit, pattern):
     req = proto.GlobFileRequest(commit=commit_from(commit),
                                 pattern=pattern)
     res = self.stub.GlobFile(req, metadata=self.metadata)
     if hasattr(res, 'file_info'):
         return res.file_info
     return []
Exemplo n.º 10
0
 def finish_commit(self,
                   commit,
                   description=None,
                   tree_object_hashes=None,
                   datum_object_hash=None,
                   size_bytes=None,
                   empty=None):
     """
     Ends the process of committing data to a Repo and persists the
     Commit. Once a Commit is finished the data becomes immutable and
     future attempts to write to it with PutFile will error.
     Params:
     * commit: A tuple, string, or `Commit` object representing the commit.
     * description: An optional string describing this commit.
     * tree_object_hashes: A list of zero or more strings specifying object
     hashes.
     * datum_object_hash: An optional string specifying an object hash.
     * size_bytes: An optional int.
     * empty: An optional bool. If set, the commit will be closed (its
     `finished` field will be set to the current time) but its `tree` will
     be left nil.
     """
     req = proto.FinishCommitRequest(
         commit=commit_from(commit),
         description=description,
         trees=[proto.Object(hash=h) for h in tree_object_hashes]
         if tree_object_hashes is not None else None,
         datums=proto.Object(hash=datum_object_hash)
         if datum_object_hash is not None else None,
         size_bytes=size_bytes,
         empty=empty,
     )
     return self.stub.FinishCommit(req, metadata=self.metadata)
Exemplo n.º 11
0
    def flush_commit(self, commits, repos=None):
        """
        Blocks until all of the commits which have a set of commits as
        provenance have finished. For commits to be considered they must have
        all of the specified commits as provenance. This in effect waits for
        all of the jobs that are triggered by a set of commits to complete.
        It returns an error if any of the commits it's waiting on are
        cancelled due to one of the jobs encountering an error during runtime.
        Note that it's never necessary to call FlushCommit to run jobs,
        they'll run no matter what, FlushCommit just allows you to wait for
        them to complete and see their output once they do. This returns an
        iterator of CommitInfo objects.

        Yields `CommitInfo` objects.

        Params:
        * commits: A list of tuples, strings, or `Commit` objects representing
        the commits to flush.
        * repos: An optional list of strings specifying repo names. If
        specified, only commits within these repos will be flushed.
        """
        to_repos = [proto.Repo(name=r) for r in repos] if repos is not None else None
        req = proto.FlushCommitRequest(commits=[commit_from(c) for c in commits],
                                       to_repos=to_repos)
        return self.stub.FlushCommit(req, metadata=self.metadata)
Exemplo n.º 12
0
 def delete_commit(self, commit):
     """
     Deletes a commit.
     Params:
     * commit: A tuple, string, or `Commit` object representing the commit.
     """
     req = proto.DeleteCommitRequest(commit=commit_from(commit))
     self.stub.DeleteCommit(req, metadata=self.metadata)
Exemplo n.º 13
0
    def inspect_commit(self, commit):
        """
        Returns info about a specific Commit.

        Params:
        * commit: A tuple, string, or Commit object representing the commit.
        """
        req = proto.InspectCommitRequest(commit=commit_from(commit))
        return self.stub.InspectCommit(req, metadata=self.metadata)
Exemplo n.º 14
0
    def set_branch(self, commit, branch_name):
        """
        Sets a commit and its ancestors as a branch.

        Params:
        * commit: A tuple, string, or Commit object representing the commit.
        * branch_name: The name for the branch to set.
        """
        res = proto.SetBranchRequest(commit=commit_from(commit), branch=branch_name)
        self.stub.SetBranch(res, metadata=self.metadata)
Exemplo n.º 15
0
 def inspect_file(self, commit, path):
     """
     Inspects a file. Returns a `FileInfo` object.
     Params:
     * commit: A tuple, string, or `Commit` object representing the commit.
     * path: A string specifying the path to the file.
     """
     req = proto.InspectFileRequest(
         file=proto.File(commit=commit_from(commit), path=path))
     return self.stub.InspectFile(req, metadata=self.metadata)
Exemplo n.º 16
0
    def glob_file(self, commit, pattern):
        """
        Lists files that match a glob pattern. Yields `FileInfo` objects.
        Params:
        * commit: A tuple, string, or `Commit` object representing the commit.
        * pattern: A string representing a glob pattern.
        """

        req = proto.GlobFileRequest(commit=commit_from(commit),
                                    pattern=pattern)
        return self.stub.GlobFileStream(req, metadata=self.metadata)
Exemplo n.º 17
0
    def inspect_file(self, commit, path):
        """
        Returns info about a specific file.

        Params:
        * commit: A tuple, string, or Commit object representing the commit.
        * path: Path to file.
        """
        req = proto.InspectFileRequest(file=proto.File(commit=commit_from(commit), path=path))
        res = self.stub.InspectFile(req, metadata=self.metadata)
        return res
Exemplo n.º 18
0
 def inspect_commit(self, commit, block_state=None):
     """
     Inspects a commit. Returns a `CommitInfo` object.
     Params:
     * commit: A tuple, string, or `Commit` object representing the commit.
     * block_state: Causes inspect commit to block until the commit is in
     the desired commit state.
     """
     req = proto.InspectCommitRequest(commit=commit_from(commit),
                                      block_state=block_state)
     return self.stub.InspectCommit(req, metadata=self.metadata)
Exemplo n.º 19
0
    def finish_commit(self, commit):
        """
        Ends the process of committing data to a Repo and persists the
        Commit. Once a Commit is finished the data becomes immutable and
        future attempts to write to it with PutFile will error.

        Params:
        * commit: A tuple, string, or Commit object representing the commit.
        """
        req = proto.FinishCommitRequest(commit=commit_from(commit))
        res = self.stub.FinishCommit(req, metadata=self.metadata)
        return res
Exemplo n.º 20
0
 def delete_file(self, commit, path):
     """
     Deletes a file from a Commit. DeleteFile leaves a tombstone in the
     Commit, assuming the file isn't written to later attempting to get the
     file from the finished commit will result in not found error. The file
     will of course remain intact in the Commit's parent.
     Params:
     * commit: A tuple, string, or `Commit` object representing the commit.
     * path: The path to the file.
     """
     req = proto.DeleteFileRequest(
         file=proto.File(commit=commit_from(commit), path=path))
     self.stub.DeleteFile(req, metadata=self.metadata)
Exemplo n.º 21
0
            def wrap(value):
                yield proto.PutFileRequest(
                    file=proto.File(commit=commit_from(commit), path=path),
                    value=value[:BUFFER_SIZE],
                    delimiter=delimiter,
                    target_file_datums=target_file_datums,
                    target_file_bytes=target_file_bytes,
                    overwrite_index=overwrite_index_proto)

                for i in range(BUFFER_SIZE, len(value), BUFFER_SIZE):
                    yield proto.PutFileRequest(
                        value=value[i:i + BUFFER_SIZE],
                        overwrite_index=overwrite_index_proto)
Exemplo n.º 22
0
 def wrap(value):
     for i, chunk in enumerate(value):
         if i == 0:
             yield proto.PutFileRequest(
                 file=proto.File(commit=commit_from(commit),
                                 path=path),
                 value=chunk,
                 delimiter=delimiter,
                 target_file_datums=target_file_datums,
                 target_file_bytes=target_file_bytes,
                 overwrite_index=overwrite_index_proto)
         else:
             yield proto.PutFileRequest(value=chunk)
Exemplo n.º 23
0
    def inspect_job(self, job_id, block_state=None, output_commit=None):
        """
        Inspects a job with a given ID. Returns a `JobInfo`.

        Params:
        * job_id: The ID of the job to inspect.
        * block_state: If true, block until the job completes.
        * output_commit: An optional tuple, string, or `Commit` object
        representing an output commit to filter on.
        """

        output_commit = commit_from(output_commit) if output_commit is not None else None
        req = proto.InspectJobRequest(job=proto.Job(id=job_id), block_state=block_state, output_commit=output_commit)
        return self.stub.InspectJob(req, metadata=self.metadata)
Exemplo n.º 24
0
    def flush_job(self, commits, pipeline_names=None):
        """
        Blocks until all of the jobs which have a set of commits as
        provenance have finished. Yields `JobInfo` objects.

        Params:
        * commits: A list of tuples, strings, or `Commit` objects representing
        the commits to flush.
        * pipeline_names: An optional list of strings specifying pipeline
        names. If specified, only jobs within these pipelines will be flushed.
        """

        commits = [commit_from(c) for c in commits]
        pipelines = [proto.Pipeline(name=name) for name in pipeline_names] if pipeline_names is not None else None
        req = proto.FlushJobRequest(commits=commits, to_pipelines=pipelines)
        return self.stub.FlushJob(req)
Exemplo n.º 25
0
            def wrap(value):
                for i in itertools.count():
                    chunk = value.read(BUFFER_SIZE)

                    if len(chunk) == 0:
                        return

                    if i == 0:
                        yield proto.PutFileRequest(
                            file=proto.File(commit=commit_from(commit),
                                            path=path),
                            value=chunk,
                            delimiter=delimiter,
                            target_file_datums=target_file_datums,
                            target_file_bytes=target_file_bytes,
                            overwrite_index=overwrite_index_proto)
                    else:
                        yield proto.PutFileRequest(value=chunk)
Exemplo n.º 26
0
    def create_branch(self, repo_name, branch_name, commit=None, provenance=None):
        """
        Creates a new branch.

        Params:
        * repo_name: A string specifying the name of the repo.
        * branch_name: A string specifying the new branch name.
        * commit: An optional tuple, string, or `Commit` object representing
        the head commit of the branch.
        * provenance: An optional iterable of `Branch` objects representing
        the branch provenance.
        """
        req = proto.CreateBranchRequest(
            branch=proto.Branch(repo=proto.Repo(name=repo_name), name=branch_name),
            head=commit_from(commit) if commit is not None else None,
            provenance=provenance,
        )
        self.stub.CreateBranch(req, metadata=self.metadata)
Exemplo n.º 27
0
 def put_file_url(self, commit, path, url, recursive=None):
     """
     Puts a file using the content found at a URL. The URL is sent to the
     server which performs the request. Note that this is not a standard
     PFS function.
     Params:
     * commit: A tuple, string, or `Commit` object representing the commit.
     * path: A string specifying the path to the file.
     * url: A string specifying the url of the file to put.
     * recursive: allow for recursive scraping of some types URLs, for
     example on s3:// URLs.
     """
     req = iter([
         proto.PutFileRequest(file=proto.File(commit=commit_from(commit),
                                              path=path),
                              url=url,
                              recursive=recursive)
     ])
     self.stub.PutFile(req, metadata=self.metadata)
Exemplo n.º 28
0
    def put_file_url(self, commit, path, url, recursive=False):
        """
        Puts a file using the content found at a URL. The URL is sent to the
        server which performs the request.

        Params:
        * commit: A tuple, string, or Commit object representing the commit.
        * path: The path to the file.
        * url: The url of the file to put.
        * recursive: allow for recursive scraping of some types URLs for
        example on s3:// urls.
        """
        req = iter([
            proto.PutFileRequest(file=proto.File(commit=commit_from(commit),
                                                 path=path),
                                 url=url,
                                 recursive=recursive)
        ])
        self.stub.PutFile(req, metadata=self.metadata)
Exemplo n.º 29
0
 def get_file(self, commit, path, offset_bytes=None, size_bytes=None):
     """
     Returns an iterator of the contents of a file at a specific commit.
     Params:
     * commit: A tuple, string, or `Commit` object representing the commit.
     * path: A string specifying the path of the file.
     * offset_bytes: An optional int. Specifies a number of bytes that
     should be skipped in the beginning of the file.
     * size_bytes: An optional int. limits the total amount of data
     returned, note you will get fewer bytes than size if you pass a value
     larger than the size of the file. If size is set to 0 then all of the
     data will be returned.
     """
     req = proto.GetFileRequest(file=proto.File(commit=commit_from(commit),
                                                path=path),
                                offset_bytes=offset_bytes,
                                size_bytes=size_bytes)
     res = self.stub.GetFile(req, metadata=self.metadata)
     for item in res:
         yield item.value
Exemplo n.º 30
0
    def list_file(self, commit, path, recursive=False):
        """
        Lists the files in a directory.

        Params:
        * commit: A tuple, string, or Commit object representing the commit.
        * path: The path to the directory.
        * recursive: If True, continue listing the files for sub-directories.
        """
        req = proto.ListFileRequest(
            file=proto.File(commit=commit_from(commit), path=path))
        res = self.stub.ListFile(req, metadata=self.metadata)
        file_infos = res.file_info

        if recursive:
            dirs = [f for f in file_infos if f.file_type == proto.DIR]
            files = [f for f in file_infos if f.file_type == proto.FILE]
            return sum(
                [self.list_file(commit, d.file.path, recursive) for d in dirs],
                files)

        return list(file_infos)