async def test_condition_does_not_run(self): ran = [] @op(conditions=[CONDITION]) async def condition_test(hi: str): ran.append(True) async with MemoryOrchestrator() as orchestrator: async with orchestrator(DataFlow(condition_test)) as octx: async for _ in octx.run([ Input( value=True, definition=condition_test.op.inputs["hi"], ), ]): pass self.assertFalse(ran)
async def test_create_from_path(self): # Create temp dir and write op to ops.py with tempfile.TemporaryDirectory() as tmpdirname: # Change directory into the tempdir with chdir(tmpdirname): # Write out op to op.py operation_file_path = pathlib.Path(tmpdirname, "ops.py") operation_file_path.write_text(OP_DEF_STRING) # We make the name the path relative to our cwd operation_qualname = "ops:echo_string" dataflow_file_path = pathlib.Path(tmpdirname, "dataflow.json") # $ dffml dataflow create \ # ops:echo_string get_single with io.StringIO() as dataflow: with contextlib.redirect_stdout(dataflow): await CLI.cli( "dataflow", "create", *[operation_qualname, "get_single"], "-seed", '["OutputString"]=get_single_spec', ) test_dataflow = DataFlow._fromdict( **json.loads(dataflow.getvalue())) # Make sure the operation is in the dataflow self.assertIn(operation_qualname, test_dataflow.operations) # Run the dataflow async for ctx_str, results in run( test_dataflow, [ Input( value="Irregular at magic school", definition=test_dataflow.operations[ operation_qualname].inputs["input_string"], ) ], ): self.assertIn("OutputString", results) self.assertEqual( results["OutputString"], "Irregular at magic school", )
async def test_run(self): op()(convert_to_gif) dataflow = DataFlow.auto(convert_to_gif, GetSingle) dataflow.seed.append( Input( value=[convert_to_gif.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], )) dataflow.implementations[convert_to_gif.op.name] = convert_to_gif.imp input_file_path = self.parent_path / "input.mp4" with open(input_file_path, "rb") as f: input_file = f.read(-1) test_inputs = { "Test": [ Input( value=input_file, definition=convert_to_gif.op.inputs["input_file"], ), Input( value=240, definition=convert_to_gif.op.inputs["resolution"], ), ] } async with MemoryOrchestrator.withconfig({}) as orchestrator: async with orchestrator(dataflow) as octx: async for ctx, results in octx.run(test_inputs): idx = "examples.ffmpeg.operations:convert_to_gif.outputs.result" self.assertIn( idx, results, ) results = results[idx] self.assertIn("output_file", results) output = results["output_file"] self.assertGreater(len(output), 100000)
async def make_dataflow(ops, operations, inputs): # Create temp dir and write op to ops.py with tempfile.TemporaryDirectory() as tmpdirname: # Change directory into the tempdir with chdir(tmpdirname): # Write out op to op.py pathlib.Path(tmpdirname, "ops.py").write_text(ops) # Reload contents sys.path.insert(0, tmpdirname) module = importlib.import_module("ops") importlib.reload(module) sys.path.pop(0) # $ dffml dataflow create $operations -inputs $inputs with io.StringIO() as dataflow: with contextlib.redirect_stdout(dataflow): await CLI.cli( "dataflow", "create", *operations, "-inputs", *inputs, ) yield DataFlow._fromdict(**json.loads(dataflow.getvalue()))
from dffml import op, DataFlow, Input, GetSingle from dffml_feature_git.feature.operations import clone_git_repo from .npm_audit import run_npm_audit from .check import check_javascript from ..types import SA_RESULTS DATAFLOW = DataFlow.auto(run_npm_audit, GetSingle,) DATAFLOW.seed.append( Input( value=[run_npm_audit.op.outputs["report"].name,], definition=GetSingle.op.inputs["spec"], ) ) @op( inputs={"repo": clone_git_repo.op.outputs["repo"]}, outputs={"result": SA_RESULTS}, conditions=[check_javascript.op.outputs["javascript"]], ) async def analyze_javascript(self, repo): """ Run JS static analysis """ async with self.octx.parent(DATAFLOW) as octx: async for _, results in octx.run( [
from .python.analyze import analyze_python from .javascript.check import check_javascript from .javascript.analyze import analyze_javascript from .rust.check import check_rust from .rust.analyze import analyze_rust # Import static analysis result definition from .types import SA_RESULTS # Link inputs and outputs together according to their definitions DATAFLOW = DataFlow.auto( clone_git_repo, check_python, analyze_python, check_javascript, analyze_javascript, check_rust, analyze_rust, cleanup_git_repo, GetSingle, ) DATAFLOW.seed.append( Input( value=[ SA_RESULTS.name, ], definition=GetSingle.op.inputs["spec"], )) # Allow for directory to be provided by user instead of the result of cloning a # Git repo to a temporary directory on disk
@op( inputs={"repo": clone_git_repo.op.outputs["repo"]}, outputs={ "result": Definition(name="has_package_cargo_result", primitive="bool") }, ) def has_package_cargo(repo: clone_git_repo.op.outputs["repo"].spec): return { "result": pathlib.Path(repo.directory, "cargo.toml").is_file() or pathlib.Path(repo.directory, "Cargo.toml").is_file() } DATAFLOW = DataFlow.auto(has_package_cargo, GetSingle) DATAFLOW.seed.append( Input( value=[ has_package_cargo.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], )) @op( inputs={"repo": clone_git_repo.op.outputs["repo"]}, outputs={"rust": Definition(name="repo_is_rust", primitive="string")}, ) async def check_rust(self, repo): async with self.octx.parent(DATAFLOW) as octx:
# Import all the operations we wrote from .python.bandit import run_bandit from .python.pypi import pypi_latest_package_version from .python.pypi import pypi_package_json from .python.pypi import pypi_package_url from .python.pypi import pypi_package_contents from .python.pypi import cleanup_pypi_package from .python.safety import safety_check # Link inputs and outputs together according to their definitions DATAFLOW = DataFlow.auto( pypi_package_json, pypi_latest_package_version, pypi_package_url, pypi_package_contents, cleanup_pypi_package, safety_check, run_bandit, GetSingle, ) # Seed inputs are added to each executing context. The following Input tells the # GetSingle output operation that we want the output of the network to include # data matching the "issues" output of the safety_check operation, and the # "report" output of the run_bandit operation, for each context. DATAFLOW.seed.append( Input( value=[ safety_check.op.outputs["issues"].name, run_bandit.op.outputs["report"].name, ], definition=GetSingle.op.inputs["spec"],
from dffml import op, DataFlow, Input, GetSingle from dffml_feature_git.feature.operations import clone_git_repo from .cargo_audit import run_cargo_audit from .check import check_rust from ..types import SA_RESULTS DATAFLOW = DataFlow.auto(run_cargo_audit, GetSingle,) DATAFLOW.seed.append( Input( value=[run_cargo_audit.op.outputs["report"].name,], definition=GetSingle.op.inputs["spec"], ) ) @op( inputs={"repo": clone_git_repo.op.outputs["repo"]}, outputs={"result": SA_RESULTS}, conditions=[check_rust.op.outputs["rust"]], ) async def analyze_rust(self, repo): """ Run Rust static analysis """ async with self.octx.parent(DATAFLOW) as octx: async for _, results in octx.run( [ Input(
from dffml import DataFlow, Input, GetSingle from .operations import convert_to_gif DATAFLOW = DataFlow.auto(convert_to_gif, GetSingle) DATAFLOW.seed = [ Input( value=[convert_to_gif.op.outputs["output_file"].name], definition=GetSingle.op.inputs["spec"], ), Input(value=480, definition=convert_to_gif.op.inputs["resolution"]), ]
from dffml import op, DataFlow, Input, Definition, GetSingle from dffml_feature_git.feature.operations import clone_git_repo @op( inputs={"repo": clone_git_repo.op.outputs["repo"]}, outputs={ "result": Definition(name="has_setup_py_result", primitive="bool") }, ) def has_setup_py(repo: clone_git_repo.op.outputs["repo"].spec): return {"result": pathlib.Path(repo.directory, "setup.py").is_file()} DATAFLOW_ID_PYTHON = DataFlow.auto(has_setup_py, GetSingle) DATAFLOW_ID_PYTHON.seed.append( Input( value=[ has_setup_py.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], )) @op( inputs={"repo": clone_git_repo.op.outputs["repo"]}, outputs={"python": Definition(name="repo_is_python", primitive="string")}, ) async def check_python(self, repo): async with self.octx.parent(DATAFLOW_ID_PYTHON) as octx:
OBJ = Definition(name="obj", primitive="mapping") LOCKED_OBJ = Definition(name="locked_obj", primitive="mapping", lock=True) SLEEP_TIME = Definition(name="sleep_time", primitive="int") INTEGER = Definition(name="integer", primitive="int") @op(inputs={"obj": OBJ, "sleep_for": SLEEP_TIME, "i": INTEGER}) async def run_me(obj: dict, sleep_for: int, i: int) -> None: obj["i"] = i await asyncio.sleep(sleep_for) print(f"set i = {i}, got i = {obj['i']}") print("Running dataflow without locked object") for ctx, result in run( DataFlow(run_me), [ Input(value={}, definition=OBJ), Input(value=0.1, definition=SLEEP_TIME), Input(value=0.2, definition=SLEEP_TIME), Input(value=1, definition=INTEGER), Input(value=2, definition=INTEGER), ], ): pass print("Running dataflow with locked object") run_me.op = run_me.op._replace(inputs={ "obj": LOCKED_OBJ, "sleep_for": SLEEP_TIME, "i": INTEGER
from dffml import op, DataFlow, Input, Definition, GetSingle from dffml_feature_git.feature.operations import clone_git_repo @op( inputs={"repo": clone_git_repo.op.outputs["repo"]}, outputs={ "result": Definition(name="has_package_json_result", primitive="bool") }, ) def has_package_json(repo: clone_git_repo.op.outputs["repo"].spec): return {"result": pathlib.Path(repo.directory, "package.json").is_file()} DATAFLOW = DataFlow.auto(has_package_json, GetSingle) DATAFLOW.seed.append( Input( value=[ has_package_json.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], )) @op( inputs={"repo": clone_git_repo.op.outputs["repo"]}, outputs={ "javascript": Definition(name="repo_is_javascript", primitive="string") }, )