def run_small_gridcat(session): BLOB_SIZE = 5000 BLOB_COUNT = 10 rnd = test_rnd() def random_string(rnd, length): return "".join(rnd.choice(CHARS) for i in range(length)) cat = Program(("cat", Input("input1"), Input("input2")), stdout="output") md5sum = Program("md5sum", stdin="input", stdout="output") @remote() def take_first(ctx, data): return data.get_bytes().split()[0] consts = [blob(random_string(rnd, BLOB_SIZE)) for i in range(BLOB_COUNT)] ts = [] for i in range(BLOB_COUNT): for j in range(BLOB_COUNT): t1 = cat(input1=consts[i], input2=consts[j]) t2 = md5sum(input=t1) t3 = take_first(t2) ts.append(t3.output) result = md5sum(input=tasks.Concat(ts)) result.output.keep() # session.pending_graph().write("/home/spirali/tmp/graph.dot") session.submit() result.output.fetch() == b"0a9612a2e855278d336a9e1a1589478f -\n"
def test_execute_outputs(test_env): "Specify program content type on spec and instantiation." obj = ["1", 2.0, {'a': 42}] test_env.start(1) with test_env.client.new_session() as s: # No content type t1a = tasks.execute( ["cat", Input("somefile", dataobj=pickled(obj))], stdout=Output()) t1a.output.keep() # Static content-type by instantiation t1b = tasks.execute( ["cat", Input("somefile", dataobj=pickled(obj))], stdout=Output(content_type='pickle')) t1b.output.keep() # Stdin specification t1c = tasks.execute(["cat"], stdin=Input("somefile", dataobj=pickled(obj)), stdout=Output(content_type='pickle')) t1c.output.keep() # Auto input naming t1d = tasks.execute(["cat", Input(dataobj=pickled(obj))], stdout=Output(content_type='pickle')) t1d.output.keep() s.submit() assert t1b.output.fetch().load() == obj assert t1c.output.fetch().load() == obj assert t1d.output.fetch().load() == obj assert t1a.output.content_type is None with pytest.raises(RainException): t1a.output.fetch().load()
def test_program_write_input(test_env): test_env.start(1) with test_env.client.new_session() as s: x = blob(b"abc") t = tasks.Execute("ls", input_paths=[Input("x", dataobj=x)], output_paths=["x"]).output t.keep() t2 = tasks.Execute("echo 'xyz' > x", input_paths=[Input("x", dataobj=t, write=True)], output_paths=["x"], shell=True).output t2.keep() s.submit() assert t2.fetch().get_bytes() == b"xyz\n" assert t.fetch().get_bytes() == b"abc"
def test_program_link_readonly(test_env): test_env.start(1) with test_env.client.new_session() as s: x = blob(b"abc") t = tasks.Execute("ls", input_paths=[Input("x", dataobj=x)], output_paths=["x"]) t.output.keep() s.submit() s.wait_all() tasks.Execute("ls > x", input_paths=[Input("x", dataobj=t.output)], output_paths=["x"], shell=True) s.submit() with pytest.raises(TaskException): s.wait_all()
def test_program_input_file(test_env): """Setting input file for program""" test_env.start(1) program = Program(("/bin/grep", "ab", Input("in1")), stdout="output") with test_env.client.new_session() as s: t1 = program(in1=blob("abc\nNOTHING\nabab")) t1.output.keep() s.submit() assert t1.output.fetch().get_bytes() == b"abc\nabab\n"
def test_cpp_hello_file(test_env): test_env.start(1, executor="cpptester") with test_env.client.new_session() as s: d1 = blob("WORLD") t0 = tasks.execute("ls", input_paths=[Input("d1", dataobj=d1)], output_paths=[Output("d1")]) t1 = cpp_hello(t0.output) t1.output.keep() s.submit() assert t1.output.fetch().get_bytes() == b"Hello WORLD!"
def test_hello_file(self, test_env): self.start(test_env) with test_env.client.new_session() as s: d1 = blob("WORLD") t0 = tasks.Execute("ls", input_paths=[Input("d1", dataobj=d1)], output_paths=[Output("d1")]) t1 = self.task_hello(t0.output) t1.output.keep() s.submit() assert t1.output.fetch().get_bytes() == b"Hello WORLD!"
def test_execute_input_file(test_env): """Setting input file for program""" test_env.start(1) with test_env.client.new_session() as s: t1 = tasks.Execute( ("/bin/grep", "ab", Input("in1", dataobj=blob("abc\nNOTHING\nabab"))), stdout="output") t1.output.keep() s.submit() assert t1.output.fetch().get_bytes() == b"abc\nabab\n"
def test_program_outputs(test_env): "Specify program content type on spec and instantiation." obj = ["1", 2.0, {'a': 42}] program1 = Program(["cat", Input("i")], stdout="o") program2 = Program(["cat", Input("i", content_type='pickle')], stdout=Output(content_type='pickle')) test_env.start(1) with test_env.client.new_session() as s: # Dynamic content-type, forgotten by cat t1a = program1(i=pickled(obj)) t1a.output.keep() # Static content-type by instantiation t1b = program1(i=pickled(obj), output=Output(content_type='pickle')) t1b.output.keep() # No content type t1c = program1(i=blob(pickle.dumps(obj))) t1c.output.keep() # Static content-type by Program spec t2 = program2(i=pickled(obj)) t2.output.keep() s.submit() assert t1a.output.content_type is None with pytest.raises(RainException): assert t1a.output.fetch().load() == obj assert t1a.output.fetch().get_bytes() == pickle.dumps(obj) # TODO(gavento): Needs OutputSpec and Output merging # assert t1b.output.fetch().load() == obj assert t1c.output.content_type is None with pytest.raises(RainException): t1c.output.fetch().load() assert t1a.output.fetch().get_bytes() == pickle.dumps(obj) assert t2.output.fetch().load() == obj
def test_python_datainstance_write(test_env): @remote() def remote_fn(ctx, input1, input2): input1.write("test1") input2.write("test2") with open("test1") as f: assert f.read() == "Data 1" with open("test1", "w") as f: f.write("New data 1") with open("test1") as f: assert f.read() == "New data 1" with open("test2/file") as f: assert f.read() == "Data 2" with open("test2/file", "w") as f: f.write("New data 2") with open("test2/file") as f: assert f.read() == "New data 2" os.mkdir("test2/testdir") os.unlink("test1") os.unlink("test2/file") return b"" test_env.start(1) with test_env.client.new_session() as s: d1 = blob(b"Data 1") os.mkdir("dir") with open("dir/file", "w") as f: f.write("Data 2") d2 = directory("dir") remote_fn(d1, d2) s.submit() s.wait_all() x = tasks.Execute( "ls", input_paths=[Input("d1", dataobj=d1), InputDir("d2", dataobj=d2)], output_paths=[Output("d1"), OutputDir("d2")]) remote_fn(x.outputs["d1"], x.outputs["d2"]) s.submit() s.wait_all()
def test2(ctx, in1: Input(content_type='json'), in2: Input(content_type='pickle', load=True), in3: Input(load=True), # expects input tuple (pickle(42.0), "foo") in4, # static type 'json' in5, # No type and no input, only python objects, *args: Input(content_type='text', load=False), ina: Input(load=True, content_type='cbor') ="bar", # for 'ina' **kwargs: Input(load=True) # dynamic types, different ) -> 0: test_gen(ctx, in1, in2, in3, in4, in5, *args, ina=ina, **kwargs)
def test_py_pass_through(test_env): @remote(outputs=("out1", "out2")) def test(ctx, data1, data2): return {"out1": data1, "out2": data2} test_env.start(1) cat = Program("/bin/cat input1", stdout="output", input_paths=[Input("input1")]) with test_env.client.new_session() as s: data = b"ABC" * 10000 t0 = cat(input1=blob(data)) t1 = test(t0, blob("Hello!")) t1.outputs["out1"].keep() t1.outputs["out2"].keep() s.submit() assert data == t1.outputs["out1"].fetch().get_bytes() assert b"Hello!" == t1.outputs["out2"].fetch().get_bytes()
def test_input_detailed_specs(test_env): "Tests specifying content types for inputs and dynamic content types." obj1 = {'A': 2, 'B': [4, 5]} obj2 = [1.0, 2.0, True] obj4 = ["a", "b"] obj5 = {"object": 5} def test_gen(ctx, in1, in2, in3, in4, in5, *args, ina="bar", **kwargs): assert isinstance(in1, DataInstance) assert in1.load() == obj1 assert in2 == obj2 assert in3 == (42.0, "foo") assert in4.load() == obj4 assert in5 == obj5 assert len(args) == 3 for i in args: assert i.content_type == "text-latin2" assert i.load() == "ňů" assert ina == "barbar" assert kwargs['kwA'] == ["A"] assert kwargs['kwB'] == ["B"] assert kwargs['kwC'] == ["C"] assert kwargs['kwD'] == ["D"] @remote( inputs={ 'in1': Input(content_type='json'), 'in2': Input(content_type='pickle', load=True), 'in3': Input(load=True), # expects input tuple (pickle(42.0), "foo") # 'in4' only static type 'json' # 'in5' has no type and no dataobject input, only python objects 'args': Input(content_type='text', load=False), 'ina': Input(load=True, content_type='cbor'), 'kwargs': Input(load=True), # dynamic types, different }, outputs=0) def test1(ctx, in1, in2, in3, in4, in5, *args, ina="bar", **kwargs): test_gen(ctx, in1, in2, in3, in4, in5, *args, ina=ina, **kwargs) @remote() def test2( ctx, in1: Input(content_type='json'), in2: Input(content_type='pickle', load=True), in3: Input(load=True), # expects input tuple (pickle(42.0), "foo") in4, # static type 'json' in5, # No type and no input, only python objects, *args: Input(content_type='text', load=False), ina: Input(load=True, content_type='cbor') = "bar", # for 'ina' **kwargs: Input(load=True) # dynamic types, different ) -> 0: test_gen(ctx, in1, in2, in3, in4, in5, *args, ina=ina, **kwargs) @remote() def copied(ctx, obj): "simply copy the blob, but does not provide static type info" return obj test_env.start(1) for test in (test1, test2): with test_env.client.new_session() as s: t1 = test(copied(blob(obj1, encode='json')), blob(pickle.dumps(obj2)), (pickled(42.0), "foo"), blob(obj4, encode='json'), obj5, blob("ňů", encode="text-latin2"), blob("ňů", encode="text-latin2"), blob("ňů", encode="text-latin2"), ina=blob("barbar", encode='cbor'), kwA=pickled(["A"]), kwB=blob(["B"], encode="json"), kwC=blob(["C"], encode="cbor"), kwD=blob(["D"], encode="arrow")) s.submit() t1.wait()