Пример #1
0
 async def run(self):
     # Make a corresponding list of path objects in case any of the targets
     # we are given are paths on disk rather than Git repo URLs
     paths = [
         pathlib.Path(target_name).expanduser().resolve()
         for target_name in self.targets
     ]
     # Run all the operations, Each iteration of this loop happens
     # when all inputs are exhausted for a context, the output
     # operations are then run and their results are yielded
     async for target_name, results in run(
         DATAFLOW,
         {
             # For each target add a new input set to the input network
             # The context operations execute under is the target name
             # to evaluate. Contexts ensure that data pertaining to
             # target A doesn't mingle with data pertaining to target B
             target_name: [
                 # The only input to the operations is the target name.
                 Input(
                     value={
                         "URL": "file://" + str(path),
                         "directory": str(path),
                     },
                     definition=clone_git_repo.op.outputs["repo"],
                 ) if path.is_dir() else Input(
                     value=target_name,
                     definition=clone_git_repo.op.inputs["URL"],
                 )
             ]
             for target_name, path in zip(self.targets, paths)
         },
     ):
         print(results)
Пример #2
0
 async def test_gen(self):
     operation_qualname = "ops:echo_strings"
     async with self.make_dataflow(
             ECHO_STRINGS,
         [operation_qualname, "get_multi"],
         ["ops:echo_strings.outputs.result,=get_multi_spec"],
     ) as dataflow:
         # Make sure the operation is in the dataflow
         self.assertIn(operation_qualname, dataflow.operations)
         # Definitions for shorthand access
         idef = dataflow.operations[operation_qualname].inputs[
             "input_string"]
         odef = dataflow.operations[operation_qualname].outputs["result"]
         # Run the dataflow
         async for ctx_str, results in run(
             dataflow,
             [Input(
                 value="Irregular at magic school",
                 definition=idef,
             )],
         ):
             self.assertIn(odef.name, results)
             self.assertListEqual(
                 results[odef.name],
                 [
                     f"Echo({i}): Irregular at magic school"
                     for i in range(0, 5)
                 ],
             )
Пример #3
0
 async def run(self):
     # Run all the operations, Each iteration of this loop happens
     # when all inputs are exhausted for a context, the output
     # operations are then run and their results are yielded
     async for package_name, results in run(
             DATAFLOW,
         {
             # For each package add a new input set to the input network
             # The context operations execute under is the package name
             # to evaluate. Contexts ensure that data pertaining to
             # package A doesn't mingle with data pertaining to package B
             package_name: [
                 # The only input to the operations is the package name.
                 Input(
                     value=package_name,
                     definition=pypi_package_json.op.inputs["package"],
                 )
             ]
             for package_name in self.packages
         },
     ):
         # Grab the number of safety issues and the bandit report
         # from the results dict
         safety_issues = results[safety_check.op.outputs["issues"].name]
         bandit_report = results[run_bandit.op.outputs["report"].name]
         # Decide if those numbers mean we should stop ship or not
         if (safety_issues > 0
                 or bandit_report["CONFIDENCE.HIGH_AND_SEVERITY.HIGH"] > 5):
             print(f"Do not install {package_name}!")
             for definition_name, result in results.items():
                 print(f"    {definition_name}: {result}")
         else:
             print(f"{package_name} is okay to install")
Пример #4
0
 async def run_dataflow(dataflow):
     async for ctx, results in run(
             dataflow,
         [
             Input(
                 value=[fail_and_retry.op.outputs["result"].name],
                 definition=GetSingle.op.inputs["spec"],
             ),
         ],
     ):
         yield results
Пример #5
0
 async def test_extract_zip_op(self):
     dataflow = create_dataflow(
         extract_zip_archive,
         {
             "input_file_path": self.test_file_pth,
             "output_directory_path": self.test_dir_pth,
         },
     )
     m_open = mock_open()
     with patch("io.open", m_open), patch("zipfile._EndRecData"), patch(
             "zipfile.ZipFile._RealGetContents"):
         async for _, _ in run(dataflow):
             m_open.assert_called_once_with(self.test_file_pth, "rb")
Пример #6
0
 async def test_make_tar_archive_op(self):
     dataflow = create_dataflow(
         make_tar_archive,
         {
             "input_directory_path": self.test_dir_pth,
             "output_file_path": self.test_file_pth,
         },
     )
     m_open = mock_open()
     with patch("tarfile.bltn_open",
                m_open), patch("tarfile.TarFile.close"):
         async for _, _ in run(dataflow):
             m_open.assert_called_once_with(self.test_file_pth, "xb")
Пример #7
0
 async def test_make_zip_op(self):
     dataflow = create_dataflow(
         make_zip_archive,
         {
             "input_directory_path": self.test_dir_pth,
             "output_file_path": self.test_file_pth,
         },
     )
     m_open = mock_open()
     with patch("io.open",
                m_open), patch("zipfile.ZipFile._write_end_record"):
         async for _, _ in run(dataflow):
             m_open.assert_called_once_with(self.test_file_pth, "w+b")
Пример #8
0
 async def test_extract_tar_op(self):
     dataflow = create_dataflow(
         extract_tar_archive,
         {
             "input_file_path": self.test_file_pth,
             "output_directory_path": self.test_dir_pth,
         },
     )
     m_open = mock_open()
     with patch("builtins.open",
                m_open), patch("tarfile.TarFile.extractall"), patch(
                    "tarfile.TarInfo.fromtarfile", m_open):
         async for _, _ in run(dataflow):
             m_open.assert_any_call("test/path/to/tar_file.tar", "rb")
Пример #9
0
 async def test_inflate_gz(self):
     dataflow = create_dataflow(
         gz_decompress,
         {
             "input_file_path": self.compressed_file_pth(".gz"),
             "output_file_path": self.uncomressed_file_pth,
         },
     )
     m_open = mock_open()
     with patch("builtins.open",
                m_open()), patch("gzip.open",
                                 m_open()), patch("shutil.copyfileobj"):
         async for _, _ in run(dataflow):
             m_open.assert_has_calls(self.get_inflation_mock_calls(".gz"))
Пример #10
0
async def main():
    bot_config = GitterChannelConfig(INISecret(filename="configs.ini"))
    dataflow = DataFlow(
        operations={x.op.name: x
                    for x in OPERATIONS},
        implementations={x.op.name: x.imp
                         for x in OPERATIONS},
        configs={x.op.name: bot_config
                 for x in OPERATIONS},
    )
    room_name = "test_community1/community"
    dataflow.seed = [
        Input(value=room_name, definition=get_room_id.op.inputs["room_uri"])
    ]
    async for ctx, result in run(dataflow):
        pass
Пример #11
0
async def main():
    # Clear the file so we overwrite with new data
    repos_json_path.write_text("[]")
    # Create and enter our sources (__aenter__()) following the Double Context
    # Entry pattern (see tutorial page for more details)
    async with OrgsReposYAMLSource(
        directory=pathlib.Path(__file__).parent.joinpath(
            "orgs")) as input_source, SAPPortalReposJSONSource(
                filename=repos_json_path,
                readwrite=True,
            ) as output_source:
        # Second context entry
        async with input_source() as input_source_ctx, output_source(
        ) as output_source_ctx:
            # Run the dataflow
            async for ctx, results in dffml.run(
                    dataflow,
                {
                    # We will run the dataflow on all input repos at the same
                    # time. The dataflow will run on each repo / record
                    # concurrently. We do this by creating a dictionary where
                    # each key is an InputSetContext, a RecordInputSetContext to
                    # be excat, since the context for each run is tied to the
                    # record / repo.
                    dffml.RecordInputSetContext(record): [
                        # Create a list of Inputs for each record's context. The
                        # only input we add at this time is the url of the repo.
                        dffml.Input(
                            value=record.key,
                            definition=dataflow.definitions["github.repo.url"],
                        )
                    ]
                    async for record in input_source_ctx.records()
                },
                    strict=False,
            ):
                # Update the feature data of the record. The feature data is
                # what we are writing out to repos.json in the source we
                # implemented.
                ctx.record.evaluated(results)
                # Print results for debugging purposes
                print(ctx.record.export())
                # Save to output repos.json
                await output_source_ctx.update(ctx.record)
Пример #12
0
 async def test_create_from_path(self):
     # Create temp dir and write op to ops.py
     with tempfile.TemporaryDirectory() as tmpdirname:
         # Change directory into the tempdir
         with chdir(tmpdirname):
             # Write out op to op.py
             operation_file_path = pathlib.Path(tmpdirname, "ops.py")
             operation_file_path.write_text(OP_DEF_STRING)
             # We make the name the path relative to our cwd
             operation_qualname = "ops:echo_string"
             dataflow_file_path = pathlib.Path(tmpdirname, "dataflow.json")
             # $ dffml dataflow create  \
             #    ops:echo_string get_single
             with io.StringIO() as dataflow:
                 with contextlib.redirect_stdout(dataflow):
                     await CLI.cli(
                         "dataflow",
                         "create",
                         *[operation_qualname, "get_single"],
                         "-seed",
                         '["OutputString"]=get_single_spec',
                     )
                 test_dataflow = DataFlow._fromdict(
                     **json.loads(dataflow.getvalue()))
             # Make sure the operation is in the dataflow
             self.assertIn(operation_qualname, test_dataflow.operations)
             # Run the dataflow
             async for ctx_str, results in run(
                 test_dataflow,
                 [
                     Input(
                         value="Irregular at magic school",
                         definition=test_dataflow.operations[
                             operation_qualname].inputs["input_string"],
                     )
                 ],
             ):
                 self.assertIn("OutputString", results)
                 self.assertEqual(
                     results["OutputString"],
                     "Irregular at magic school",
                 )
Пример #13
0
    async def test_dataflow(self):
        server_addr = f"http://127.0.0.1:{self.server.port}"
        with mock.patch.object(stream_chat.imp,
                               "CONTEXT",
                               new=FakeStreamChatImpContext):
            with tempfile.NamedTemporaryFile(suffix=".ini") as config_file:
                config_file.write(b"[secrets]\n")
                config_file.write(b"access_token = 123\n")
                config_file.write(f"botname = {BOT_NAME}\n".encode())
                config_file.write(f"api_url = {server_addr}\n".encode())
                config_file.write(f"stream_url = {server_addr}\n".encode())
                config_file.seek(0)

                bot_config = GitterChannelConfig(
                    INISecret(filename=config_file.name))
                dataflow = DataFlow(
                    operations={x.op.name: x
                                for x in OPERATIONS},
                    implementations={x.op.name: x.imp
                                     for x in OPERATIONS},
                    configs={x.op.name: bot_config
                             for x in OPERATIONS},
                )
                dataflow.seed = [
                    Input(
                        value=self.room_name,
                        definition=get_room_id.op.inputs["room_uri"],
                    )
                ]
                async for ctx, result in run(dataflow):
                    pass

            self.assertEqual(
                self.room.inbox,
                [
                    "Hey Hooman ฅ^•ﻌ•^ฅ",
                    "Gimme more details!!",
                    "Done!!",
                    "Salary: 70.00000000000001",
                    " Oops ,I didnt get that ᕙ(⇀‸↼‶)ᕗ ",
                ],
            )
Пример #14
0
 def run_dataflow(self, _octx, *inputs):
     return run(DATAFLOW, *inputs)