async def run(self): # Make a corresponding list of path objects in case any of the targets # we are given are paths on disk rather than Git repo URLs paths = [ pathlib.Path(target_name).expanduser().resolve() for target_name in self.targets ] # Run all the operations, Each iteration of this loop happens # when all inputs are exhausted for a context, the output # operations are then run and their results are yielded async for target_name, results in run( DATAFLOW, { # For each target add a new input set to the input network # The context operations execute under is the target name # to evaluate. Contexts ensure that data pertaining to # target A doesn't mingle with data pertaining to target B target_name: [ # The only input to the operations is the target name. Input( value={ "URL": "file://" + str(path), "directory": str(path), }, definition=clone_git_repo.op.outputs["repo"], ) if path.is_dir() else Input( value=target_name, definition=clone_git_repo.op.inputs["URL"], ) ] for target_name, path in zip(self.targets, paths) }, ): print(results)
async def test_gen(self): operation_qualname = "ops:echo_strings" async with self.make_dataflow( ECHO_STRINGS, [operation_qualname, "get_multi"], ["ops:echo_strings.outputs.result,=get_multi_spec"], ) as dataflow: # Make sure the operation is in the dataflow self.assertIn(operation_qualname, dataflow.operations) # Definitions for shorthand access idef = dataflow.operations[operation_qualname].inputs[ "input_string"] odef = dataflow.operations[operation_qualname].outputs["result"] # Run the dataflow async for ctx_str, results in run( dataflow, [Input( value="Irregular at magic school", definition=idef, )], ): self.assertIn(odef.name, results) self.assertListEqual( results[odef.name], [ f"Echo({i}): Irregular at magic school" for i in range(0, 5) ], )
async def run(self): # Run all the operations, Each iteration of this loop happens # when all inputs are exhausted for a context, the output # operations are then run and their results are yielded async for package_name, results in run( DATAFLOW, { # For each package add a new input set to the input network # The context operations execute under is the package name # to evaluate. Contexts ensure that data pertaining to # package A doesn't mingle with data pertaining to package B package_name: [ # The only input to the operations is the package name. Input( value=package_name, definition=pypi_package_json.op.inputs["package"], ) ] for package_name in self.packages }, ): # Grab the number of safety issues and the bandit report # from the results dict safety_issues = results[safety_check.op.outputs["issues"].name] bandit_report = results[run_bandit.op.outputs["report"].name] # Decide if those numbers mean we should stop ship or not if (safety_issues > 0 or bandit_report["CONFIDENCE.HIGH_AND_SEVERITY.HIGH"] > 5): print(f"Do not install {package_name}!") for definition_name, result in results.items(): print(f" {definition_name}: {result}") else: print(f"{package_name} is okay to install")
async def run_dataflow(dataflow): async for ctx, results in run( dataflow, [ Input( value=[fail_and_retry.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), ], ): yield results
async def test_extract_zip_op(self): dataflow = create_dataflow( extract_zip_archive, { "input_file_path": self.test_file_pth, "output_directory_path": self.test_dir_pth, }, ) m_open = mock_open() with patch("io.open", m_open), patch("zipfile._EndRecData"), patch( "zipfile.ZipFile._RealGetContents"): async for _, _ in run(dataflow): m_open.assert_called_once_with(self.test_file_pth, "rb")
async def test_make_tar_archive_op(self): dataflow = create_dataflow( make_tar_archive, { "input_directory_path": self.test_dir_pth, "output_file_path": self.test_file_pth, }, ) m_open = mock_open() with patch("tarfile.bltn_open", m_open), patch("tarfile.TarFile.close"): async for _, _ in run(dataflow): m_open.assert_called_once_with(self.test_file_pth, "xb")
async def test_make_zip_op(self): dataflow = create_dataflow( make_zip_archive, { "input_directory_path": self.test_dir_pth, "output_file_path": self.test_file_pth, }, ) m_open = mock_open() with patch("io.open", m_open), patch("zipfile.ZipFile._write_end_record"): async for _, _ in run(dataflow): m_open.assert_called_once_with(self.test_file_pth, "w+b")
async def test_extract_tar_op(self): dataflow = create_dataflow( extract_tar_archive, { "input_file_path": self.test_file_pth, "output_directory_path": self.test_dir_pth, }, ) m_open = mock_open() with patch("builtins.open", m_open), patch("tarfile.TarFile.extractall"), patch( "tarfile.TarInfo.fromtarfile", m_open): async for _, _ in run(dataflow): m_open.assert_any_call("test/path/to/tar_file.tar", "rb")
async def test_inflate_gz(self): dataflow = create_dataflow( gz_decompress, { "input_file_path": self.compressed_file_pth(".gz"), "output_file_path": self.uncomressed_file_pth, }, ) m_open = mock_open() with patch("builtins.open", m_open()), patch("gzip.open", m_open()), patch("shutil.copyfileobj"): async for _, _ in run(dataflow): m_open.assert_has_calls(self.get_inflation_mock_calls(".gz"))
async def main(): bot_config = GitterChannelConfig(INISecret(filename="configs.ini")) dataflow = DataFlow( operations={x.op.name: x for x in OPERATIONS}, implementations={x.op.name: x.imp for x in OPERATIONS}, configs={x.op.name: bot_config for x in OPERATIONS}, ) room_name = "test_community1/community" dataflow.seed = [ Input(value=room_name, definition=get_room_id.op.inputs["room_uri"]) ] async for ctx, result in run(dataflow): pass
async def main(): # Clear the file so we overwrite with new data repos_json_path.write_text("[]") # Create and enter our sources (__aenter__()) following the Double Context # Entry pattern (see tutorial page for more details) async with OrgsReposYAMLSource( directory=pathlib.Path(__file__).parent.joinpath( "orgs")) as input_source, SAPPortalReposJSONSource( filename=repos_json_path, readwrite=True, ) as output_source: # Second context entry async with input_source() as input_source_ctx, output_source( ) as output_source_ctx: # Run the dataflow async for ctx, results in dffml.run( dataflow, { # We will run the dataflow on all input repos at the same # time. The dataflow will run on each repo / record # concurrently. We do this by creating a dictionary where # each key is an InputSetContext, a RecordInputSetContext to # be excat, since the context for each run is tied to the # record / repo. dffml.RecordInputSetContext(record): [ # Create a list of Inputs for each record's context. The # only input we add at this time is the url of the repo. dffml.Input( value=record.key, definition=dataflow.definitions["github.repo.url"], ) ] async for record in input_source_ctx.records() }, strict=False, ): # Update the feature data of the record. The feature data is # what we are writing out to repos.json in the source we # implemented. ctx.record.evaluated(results) # Print results for debugging purposes print(ctx.record.export()) # Save to output repos.json await output_source_ctx.update(ctx.record)
async def test_create_from_path(self): # Create temp dir and write op to ops.py with tempfile.TemporaryDirectory() as tmpdirname: # Change directory into the tempdir with chdir(tmpdirname): # Write out op to op.py operation_file_path = pathlib.Path(tmpdirname, "ops.py") operation_file_path.write_text(OP_DEF_STRING) # We make the name the path relative to our cwd operation_qualname = "ops:echo_string" dataflow_file_path = pathlib.Path(tmpdirname, "dataflow.json") # $ dffml dataflow create \ # ops:echo_string get_single with io.StringIO() as dataflow: with contextlib.redirect_stdout(dataflow): await CLI.cli( "dataflow", "create", *[operation_qualname, "get_single"], "-seed", '["OutputString"]=get_single_spec', ) test_dataflow = DataFlow._fromdict( **json.loads(dataflow.getvalue())) # Make sure the operation is in the dataflow self.assertIn(operation_qualname, test_dataflow.operations) # Run the dataflow async for ctx_str, results in run( test_dataflow, [ Input( value="Irregular at magic school", definition=test_dataflow.operations[ operation_qualname].inputs["input_string"], ) ], ): self.assertIn("OutputString", results) self.assertEqual( results["OutputString"], "Irregular at magic school", )
async def test_dataflow(self): server_addr = f"http://127.0.0.1:{self.server.port}" with mock.patch.object(stream_chat.imp, "CONTEXT", new=FakeStreamChatImpContext): with tempfile.NamedTemporaryFile(suffix=".ini") as config_file: config_file.write(b"[secrets]\n") config_file.write(b"access_token = 123\n") config_file.write(f"botname = {BOT_NAME}\n".encode()) config_file.write(f"api_url = {server_addr}\n".encode()) config_file.write(f"stream_url = {server_addr}\n".encode()) config_file.seek(0) bot_config = GitterChannelConfig( INISecret(filename=config_file.name)) dataflow = DataFlow( operations={x.op.name: x for x in OPERATIONS}, implementations={x.op.name: x.imp for x in OPERATIONS}, configs={x.op.name: bot_config for x in OPERATIONS}, ) dataflow.seed = [ Input( value=self.room_name, definition=get_room_id.op.inputs["room_uri"], ) ] async for ctx, result in run(dataflow): pass self.assertEqual( self.room.inbox, [ "Hey Hooman ฅ^•ﻌ•^ฅ", "Gimme more details!!", "Done!!", "Salary: 70.00000000000001", " Oops ,I didnt get that ᕙ(⇀‸↼‶)ᕗ ", ], )
def run_dataflow(self, _octx, *inputs): return run(DATAFLOW, *inputs)