Exemplo n.º 1
0
def create_test_pipeline(client: python_pachyderm.Client, test_name):
    repo_name_suffix = random_string(6)
    input_repo_name = create_test_repo(client,
                                       test_name,
                                       prefix="input",
                                       suffix=repo_name_suffix)
    pipeline_repo_name = test_repo_name(test_name,
                                        prefix="pipeline",
                                        suffix=repo_name_suffix)

    client.create_pipeline(
        pipeline_repo_name,
        transform=pps_proto.Transform(
            cmd=["sh"],
            image="alpine",
            stdin=["cp /pfs/{}/*.dat /pfs/out/".format(input_repo_name)],
        ),
        input=pps_proto.Input(
            pfs=pps_proto.PfsInput(glob="/*", repo=input_repo_name)),
    )

    # TODO figre out what is actually happening here
    with client.commit(input_repo_name, "master") as commit:
        client.put_file_bytes(commit, "file.dat", b"DATA")

    return (commit, input_repo_name, pipeline_repo_name)
Exemplo n.º 2
0
def main():
    print("connecting to pachd")
    client = Client()
    print("connected")

    while True:
        # Polls queue
        msgs = receive_message()
        if msgs:
            with client.commit("spout", "master") as c:
                for msg in msgs:
                    # hash the file to assign unique name
                    filename = hashlib.sha256(msg).hexdigest() + ".txt"
                    client.put_file_bytes(c, filename, msg)