async def test_run(self): calc_strings_check = {"add 40 and 2": 42, "multiply 42 and 10": 420} async with MemoryOrchestrator.basic_config(*OPIMPS) as orchestrator: async with orchestrator() as octx: for to_calc in calc_strings_check.keys(): await octx.ictx.sadd( to_calc, Input( value=to_calc, definition=calc_parse_line.op.inputs["line"], ), Input( value=[calc_add.op.outputs["sum"].name], definition=GetSingle.op.inputs["spec"], ), ) async for ctx, results in octx.run_operations(): ctx_str = (await ctx.handle()).as_string() self.assertEqual( calc_strings_check[ctx_str], results[GetSingle.op.name][ calc_add.op.outputs["sum"].name ], )
async def test_run(self): passwords = [str(random.random()) for _ in range(0, 20)] # Orchestrate the running of these operations async with MemoryOrchestrator.basic_config(*OPIMPS) as orchestrator: definitions = Operation.definitions(*OPERATIONS) passwords = [ Input(value=password, definition=definitions['UnhashedPassword'], parents=None) for password in passwords ] output_spec = Input(value=['ScryptPassword'], definition=definitions['get_single_spec'], parents=None) async with orchestrator() as octx: # Add our inputs to the input network with the context being the URL for password in passwords: await octx.ictx.add( MemoryInputSet( MemoryInputSetConfig( ctx=StringInputSetContext(password.value), inputs=[password, output_spec]))) try: async for _ctx, results in octx.run_operations( strict=True): self.assertTrue(results) except AttributeError as error: if "module 'hashlib' has no attribute 'scrypt'" \ in str(error): return raise
async def multicomm_dataflow(self, config, request): # Seed the network with inputs given by caller # TODO(p0,security) allowlist of valid definitions to seed (set # Input.origin to something other than seed) inputs = [] # If data was sent add those inputs if request.method == "POST": # Accept a list of input data # TODO validate that input data is dict of list of inputs each item # has definition and value properties for ctx, client_inputs in (await request.json()).items(): for input_data in client_inputs: if ( not input_data["definition"] in config.dataflow.definitions ): return web.json_response( { "error": f"Missing definition for {input_data['definition']} in dataflow" }, status=HTTPStatus.NOT_FOUND, ) inputs.append( MemoryInputSet( MemoryInputSetConfig( ctx=StringInputSetContext(ctx), inputs=[ Input( value=input_data["value"], definition=config.dataflow.definitions[ input_data["definition"] ], ) for input_data in client_inputs ], ) ) ) # Run the operation in an orchestrator # TODO(dfass) Create the orchestrator on startup of the HTTP API itself async with MemoryOrchestrator.basic_config() as orchestrator: # TODO(dfass) Create octx on dataflow registration async with orchestrator(config.dataflow) as octx: results = { str(ctx): result async for ctx, result in octx.run(*inputs) } # TODO Implement input and presentation stages? """ if config.presentation == "blob": return web.Response(body=results) elif config.presentation == "text": return web.Response(text=results) else: """ return web.json_response(results)
async def run(self): # Create an Orchestrator which will manage the running of our operations async with MemoryOrchestrator.basic_config(*OPIMPS) as orchestrator: # Create a orchestrator context, everything in DFFML follows this # one-two context entry pattern async with orchestrator() as octx: for package_name in self.packages: # For each package add a new input set to the network of # inputs (ictx). Operations run under a context, the context # here is the package_name to evaluate (the first argument). # The next arguments are all the inputs we're seeding the # network with for that context. We give the package name # because pypi_latest_package_version needs it to find the # version, which safety will then use. We also give an input # to the output operation GetSingle, which takes a list of # data type definitions we want to select as our results. await octx.ictx.sadd( package_name, Input( value=package_name, definition=pypi_package_json.op.inputs["package"], ), Input( value=[ safety_check.op.outputs["issues"].name, run_bandit.op.outputs["report"].name, ], definition=GetSingle.op.inputs["spec"], ), ) # Run all the operations, Each iteration of this loop happens # when all inputs are exhausted for a context, the output # operations are then run and their results are yielded async for ctx, results in octx.run_operations(): # The context for this data flow was the package name package_name = (await ctx.handle()).as_string() # Get the results of the GetSingle output operation results = results[GetSingle.op.name] # Check if any of the values of the operations evaluate to # true, so if the number of issues found by safety is # non-zero then this will be true any_issues = list(results.values()) if (any_issues[0] > 0 or any_issues[1]["CONFIDENCE.HIGH_AND_SEVERITY.HIGH"] > 5): print(f"Do not install {package_name}! {results!r}") else: print(f"{package_name} is okay to install")
async def test_run(self): linker = Linker() exported = linker.export(*OPERATIONS) definitions, operations, _outputs = linker.resolve(exported) # Instantiate inputs repos = glob.glob( os.path.join( os.path.expanduser("~"), "Documents", "python", "testrepos", "*", ) ) if not repos: repos = glob.glob( os.path.join( os.path.expanduser("~"), "Documents", "python", "dffml" ) ) if not repos: repos = [ "https://github.com/intel/dffml", "https://github.com/pdxjohnny/dffml", ] repos = repos[:1] urls = [ Input(value=URL, definition=definitions["URL"], parents=None) for URL in repos ] no_git_branch_given = Input( value=True, definition=definitions["no_git_branch_given"], parents=None, ) date_spec = Input( value=datetime.now().strftime(TIME_FORMAT_MINTUE_RESOLUTION), definition=definitions["quarter_start_date"], parents=None, ) quarters = [ Input(value=i, definition=definitions["quarter"], parents=None) for i in range(0, 10) ] group_by_spec = Input( value={ "cloc": { "group": "quarter", "by": "language_to_comment_ratio", "fill": 0, }, "authors": { "group": "quarter", "by": "author_count", "fill": 0, }, "work": {"group": "quarter", "by": "work_spread", "fill": 0}, "release": { "group": "quarter", "by": "release_within_period", "fill": False, }, "commits": { "group": "quarter", "by": "commit_count", "fill": 0, }, }, definition=definitions["group_by_spec"], parents=None, ) # Orchestrate the running of these operations help(MemoryOrchestrator.basic_config) async with MemoryOrchestrator.basic_config(*OPIMPS) as orchestrator: async with orchestrator() as octx: # Add our inputs to the input network with the context being the URL for url in urls: await octx.ictx.sadd( url.value, url, no_git_branch_given, date_spec, group_by_spec, *quarters, ) async for ctx, results in octx.run_operations(): self.assertTrue(results)
async def multicomm_dataflow(self, config, request): # Seed the network with inputs given by caller # TODO(p0,security) allowlist of valid definitions to seed (set # Input.origin to something other than seed) inputs = [] # If data was sent add those inputs if request.method == "POST": # Accept a list of input data according to config.input_mode if config.input_mode == "default": # TODO validate that input data is dict of list of inputs each item # has definition and value properties for ctx, client_inputs in (await request.json()).items(): for input_data in client_inputs: if (not input_data["definition"] in config.dataflow.definitions): return web.json_response( { "error": f"Missing definition for {input_data['definition']} in dataflow" }, status=HTTPStatus.NOT_FOUND, ) inputs.append( MemoryInputSet( MemoryInputSetConfig( ctx=StringInputSetContext(ctx), inputs=[ Input( value=input_data["value"], definition=config.dataflow.definitions[ input_data["definition"]], ) for input_data in client_inputs ], ))) elif ":" in config.input_mode: preprocess_mode, input_def = config.input_mode.split(":") if input_def not in config.dataflow.definitions: return web.json_response( { "error": f"Missing definition for {input_data['definition']} in dataflow" }, status=HTTPStatus.NOT_FOUND, ) if preprocess_mode == "json": value = await request.json() elif preprocess_mode == "str": value = await request.text() elif preprocess_mode == "bytes": value = await request.read() elif preprocess == "stream": value = request.content else: return web.json_response( { "error": f"preprocess tag must be one of {IO_MODES}, got {preprocess}" }, status=HTTPStatus.NOT_FOUND, ) inputs.append( MemoryInputSet( MemoryInputSetConfig( ctx=StringInputSetContext("post_input"), inputs=[ Input( value=value, definition=config.dataflow. definitions[input_def], ) ], ))) else: raise NotImplementedError( "Input modes other than default,preprocess:definition_name not yet implemented" ) # Run the operation in an orchestrator # TODO(dfass) Create the orchestrator on startup of the HTTP API itself async with MemoryOrchestrator.basic_config() as orchestrator: # TODO(dfass) Create octx on dataflow registration async with orchestrator(config.dataflow) as octx: results = { str(ctx): result async for ctx, result in octx.run(*inputs) } if config.output_mode == "json": return web.json_response(results) # content_info is a List[str] ([content_type,output_keys]) # in case of stream,bytes and string in others postprocess_mode, *content_info = config.output_mode.split(":") if postprocess_mode == "stream": # stream:text/plain:get_single.beef raise NotImplementedError( "output mode not yet implemented") elif postprocess_mode == "bytes": content_type, output_keys = content_info output_data = traverse_get(results, *output_keys.split(".")) return web.Response(body=output_data) elif postprocess_mode == "text": output_data = traverse_get(results, *content_info[0].split(".")) return web.Response(text=output_data) else: return web.json_response( {"error": f"output mode not valid"}, status=HTTPStatus.NOT_FOUND, )