async def test_get_similarity(self): input_sentence1 = ( "The end is the beginning , and the beginning is the end" ) input_sentence2 = ( "The end was the beginning , and the beginning was the end" ) async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(get_similarity, GetSingle), [ Input( value=[get_similarity.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_sentence1, definition=get_similarity.op.inputs["text_1"], ), Input( value=input_sentence2, definition=get_similarity.op.inputs["text_2"], ), Input( value="en_core_web_sm", definition=get_similarity.op.inputs["spacy_model"], ), ], ): similarity_score = results[ get_similarity.op.outputs["result"].name ] self.assertGreater(similarity_score, 0.9)
async def test_simple_imputer(self): input_data = [[np.nan, 2], [6, np.nan], [7, 6]] output_data = [[6.5, 2], [6, 4], [7, 6]] async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(simple_imputer, GetSingle), [ Input( value=[simple_imputer.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_data, definition=simple_imputer.op.inputs["data"], ), Input( value=np.nan, definition=simple_imputer.op.inputs["missing_values"], ), Input( value="mean", definition=simple_imputer.op.inputs["strategy"], ), ], ): self.assertTrue((results[simple_imputer.op.outputs["result"].name] == output_data).all())
async def test_principal_component_analysis(self): input_data, _ = make_classification( n_samples=10, n_features=10, n_informative=8, n_redundant=2, random_state=7, ) async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(principal_component_analysis, GetSingle), [ Input( value=[ principal_component_analysis.op.outputs["result"].name ], definition=GetSingle.op.inputs["spec"], ), Input( value=input_data, definition=principal_component_analysis.op.inputs["data"], ), Input( value=8, definition=principal_component_analysis.op. inputs["n_components"], ), ], ): self.assertTrue((10, 8) == results[ principal_component_analysis.op.outputs["result"].name].shape)
async def operation_db(): """ Create the database and table (myTable) for the db operations """ sdb = SqliteDatabase(SqliteDatabaseConfig(filename="examples.db")) dataflow = DataFlow( operations={"db_query_create": db_query_create_table.op}, configs={"db_query_create": DatabaseQueryConfig(database=sdb)}, seed=[], ) inputs = [ Input( value="myTable", definition=db_query_create_table.op.inputs["table_name"], ), Input( value={ "key": "INTEGER NOT NULL PRIMARY KEY", "firstName": "text", "lastName": "text", "age": "int", }, definition=db_query_create_table.op.inputs["cols"], ), ] async for ctx, result in MemoryOrchestrator.run(dataflow, inputs): pass
async def test_get_embedding(self): input_sentence = ( "The end is the beginning , and the beginning is the end") async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(get_embedding, GetSingle), [ Input( value=[get_embedding.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_sentence, definition=get_embedding.op.inputs["text"], ), Input( value="en_core_web_sm", definition=get_embedding.op.inputs["spacy_model"], ), ], ): embeddings = results[get_embedding.op.outputs["result"].name] self.assertEqual(len(input_sentence.split()), len(embeddings)) self.assertEqual( embeddings[randint(0, len(input_sentence.split()) - 1)].shape, embeddings[randint(0, len(input_sentence.split()) - 1)].shape, )
async def test_associatedefinition(self): feed_def = Definition(name="feed", primitive="string") dead_def = Definition(name="dead", primitive="string") output = Definition(name="output", primitive="string") feed_input = Input(value="my favorite value", definition=feed_def) face_input = Input( value="face", definition=output, parents=[feed_input] ) dead_input = Input( value="my second favorite value", definition=dead_def ) beef_input = Input( value="beef", definition=output, parents=[dead_input] ) test_result = {"feed": "face", "dead": "beef"} for test_value in test_result.keys(): async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(AssociateDefinition), [ feed_input, face_input, dead_input, beef_input, Input( value={test_value: "output"}, definition=AssociateDefinition.op.inputs["spec"], ), ], ): self.assertEqual( results, {test_value: test_result[test_value]} )
async def test_calcHist(self): async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(calcHist, GetSingle), [ Input( value=[ calcHist.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], ), Input( value=self.INPUT_ARRAY, definition=calcHist.op.inputs["images"], ), Input( value=None, definition=calcHist.op.inputs["mask"], ), Input( value=[0, 1], definition=calcHist.op.inputs["channels"], ), Input( value=[32, 32], definition=calcHist.op.inputs["histSize"], ), Input( value=[0, 256, 0, 256], definition=calcHist.op.inputs["ranges"], ), ], ): self.assertEqual(results[calcHist.op.outputs["result"].name].shape, (32, 32))
async def test_convert_color(self): async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(convert_color, GetSingle), [ Input( value=[ convert_color.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], ), Input( value=self.INPUT_ARRAY, definition=convert_color.op.inputs["src"], ), Input( value="BGR2RGB", definition=convert_color.op.inputs["code"], ), ], ): self.assertEqual( cv2.cvtColor( results[convert_color.op.outputs["result"].name], cv2.COLOR_RGB2BGR, ).flatten().tolist(), self.INPUT_ARRAY.flatten().tolist(), )
async def test_pos_tagger(self): input_sentence = ( "The end is the beginning , and the beginning is the end" ) async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(pos_tagger, GetSingle), [ Input( value=[pos_tagger.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_sentence, definition=pos_tagger.op.inputs["text"], ), Input( value="en_core_web_sm", definition=pos_tagger.op.inputs["spacy_model"], ), ], ): pos_tags = results[pos_tagger.op.outputs["result"].name] words = input_sentence.split() for i, _ in enumerate(words): self.assertEqual(pos_tags[i][0], words[i]) self.assertIn(pos_tags[i][1], ["DT", "NN", "VBZ", "CC", ","])
async def records(self) -> AsyncIterator[Record]: async for record in self.sctx.records(): async for ctx, result in MemoryOrchestrator.run( self.parent.config.dataflow, [ Input( value=record.feature(feature.name), definition=Definition(name=feature.name, primitive=str(feature.dtype())), ) for feature in self.parent.config.features ], ): if result: record.evaluated(result) yield record
async def test_standard_scaler(self): input_data = [[0, 0], [0, 0], [1, 1], [1, 1]] output_data = [[-1, -1], [-1, -1], [1, 1], [1, 1]] async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(standard_scaler, GetSingle), [ Input( value=[standard_scaler.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_data, definition=standard_scaler.op.inputs["data"], ), ], ): self.assertTrue((results[standard_scaler.op.outputs["result"].name] == output_data))
async def test_HuMoments(self): async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(HuMoments, GetSingle), [ Input( value=[ HuMoments.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], ), Input( value=self.INPUT_ARRAY, definition=HuMoments.op.inputs["m"], ), ], ): self.assertEqual( results[HuMoments.op.outputs["result"].name].shape, (7, ))
async def test_remove_whitespaces(self): input_data = [[" ABC ", "XYD "], [" ABC", " XYD "]] output_data = [["ABC", "XYD"], ["ABC", "XYD"]] async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(remove_whitespaces, GetSingle), [ Input( value=[remove_whitespaces.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_data, definition=remove_whitespaces.op.inputs["data"], ), ], ): self.assertTrue( (results[remove_whitespaces.op.outputs["result"].name] == output_data).all())
async def test_normalize(self): async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(normalize, GetSingle), [ Input( value=[ normalize.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], ), Input( value=self.INPUT_ARRAY, definition=normalize.op.inputs["src"], ), ], ): self.assertEqual( results[normalize.op.outputs["result"].name].shape, self.INPUT_ARRAY.shape, )
async def test_remove_stopwords(self): input_sentence = ( "The end is the beginning, and the beginning is the end") output_sentence = "end beginning , beginning end" async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(remove_stopwords, GetSingle), [ Input( value=[remove_stopwords.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_sentence, definition=remove_stopwords.op.inputs["text"], ), ], ): self.assertEqual( results[remove_stopwords.op.outputs["result"].name], output_sentence, )
async def test_get_sentences(self): input_sentence = "The end is the beginning. The beginning is the end." async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(get_sentences, GetSingle), [ Input( value=[get_sentences.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_sentence, definition=get_sentences.op.inputs["text"], ), Input( value="en_core_web_sm", definition=get_sentences.op.inputs["spacy_model"], ), ], ): sentences = results[get_sentences.op.outputs["result"].name] self.assertEqual(len(sentences), 2)
async def test_singular_value_decomposition(self): input_data, _ = make_classification( n_samples=10, n_features=10, n_informative=8, n_redundant=2, random_state=7, ) async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(singular_value_decomposition, GetSingle), [ Input( value=[ singular_value_decomposition.op.outputs["result"].name ], definition=GetSingle.op.inputs["spec"], ), Input( value=input_data, definition=singular_value_decomposition.op.inputs["data"], ), Input( value=8, definition=singular_value_decomposition.op. inputs["n_components"], ), Input( value=1, definition=singular_value_decomposition.op. inputs["n_iter"], ), Input( value=7, definition=singular_value_decomposition.op. inputs["random_state"], ), ], ): self.assertTrue((10, 8) == results[singular_value_decomposition.op. outputs["result"].name].shape, )
async def test_flatten(self): input_array = np.zeros((100, 100, 3), dtype=np.uint8) output_array = [0] * (100 * 100 * 3) async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(flatten, GetSingle), [ Input( value=[ flatten.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], ), Input( value=input_array, definition=flatten.op.inputs["array"], ), ], ): self.assertEqual( results[flatten.op.outputs["result"].name].tolist(), output_array, )
async def test_ordinal_encoder(self): input_data = [["x", "a"], ["x", "b"], ["y", "a"]] output_data = [ [1.0, 0.0, 1.0, 0.0], [1.0, 0.0, 0.0, 1.0], [0.0, 1.0, 1.0, 0.0], ] async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(ordinal_encoder, GetSingle), [ Input( value=[ordinal_encoder.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_data, definition=ordinal_encoder.op.inputs["data"], ), ], ): self.assertTrue((results[ordinal_encoder.op.outputs["result"].name] == output_data).all())
async def test_run(self): packages = { "http://pkg.freebsd.org/FreeBSD:13:amd64/latest/All/ImageMagick7-7.0.8.48.txz": {}, "https://download.clearlinux.org/releases/10540/clear/x86_64/os/Packages/sudo-setuid-1.8.17p1-34.x86_64.rpm": { "./usr/bin/sudo": True }, "https://rpmfind.net/linux/fedora/linux/updates/29/Everything/x86_64/Packages/g/gzip-1.9-9.fc29.x86_64.rpm": {}, "https://archives.fedoraproject.org/pub/archive/fedora/linux/releases/20/Everything/x86_64/os/Packages/c/curl-7.32.0-3.fc20.x86_64.rpm": { "./usr/bin/curl": False }, } found = dict(zip(packages.keys(), [False] * len(packages))) async for ctx, results in MemoryOrchestrator.run( dataflow, { URL: [ Input(value=URL, definition=URLToURLBytes.op.inputs["URL"]), Input( value=["rpm_filename", "binary_is_PIE"], definition=Associate.op.inputs["spec"], ), ] for URL in packages }, strict=True, ): package_url = (await ctx.handle()).as_string() with self.subTest(package_url=package_url): self.assertIn("binary_is_PIE", results) self.assertDictEqual(results["binary_is_PIE"], packages[package_url]) found[package_url] = True self.assertTrue(all(found.values()), "Not all packages we analyized: f{found}")
async def test_lemmatizer(self): input_sentence = ( "The end is the beginning , and the beginning is the end" ) async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(lemmatizer, GetSingle), [ Input( value=[lemmatizer.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_sentence, definition=lemmatizer.op.inputs["text"], ), Input( value="en_core_web_sm", definition=lemmatizer.op.inputs["spacy_model"], ), ], ): lemma_list = results[lemmatizer.op.outputs["result"].name] self.assertEqual(len(input_sentence.split()), len(lemma_list))
async def test_one_hot_encoder(self): categories = [["Male", "Female"], [1, 2, 3]] input_data = [["Female", 1], ["Male", 3]] output_data = [[0.0, 1.0, 1.0, 0.0, 0.0], [1.0, 0.0, 0.0, 0.0, 1.0]] async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(one_hot_encoder, GetSingle), [ Input( value=[one_hot_encoder.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_data, definition=one_hot_encoder.op.inputs["data"], ), Input( value=categories, definition=one_hot_encoder.op.inputs["categories"], ), ], ): self.assertTrue((results[one_hot_encoder.op.outputs["result"].name] == output_data).all())
async def main(): # train the model await train( slr_model, { "Years": 0, "Salary": 10 }, { "Years": 1, "Salary": 20 }, { "Years": 2, "Salary": 30 }, { "Years": 3, "Salary": 40 }, ) # Run the dataflow async for ctx, results in MemoryOrchestrator.run(dataflow, {"inputs": []}): pass
async def test_resize(self): async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(resize, GetSingle), [ Input( value=[ resize.op.outputs["result"].name, ], definition=GetSingle.op.inputs["spec"], ), Input( value=self.INPUT_ARRAY, definition=resize.op.inputs["src"], ), Input( value=[50, 50, 3], definition=resize.op.inputs["dsize"], ), ], ): self.assertEqual( results[resize.op.outputs["result"].name].shape, (50, 50, 3), )
async def test_tfidf_vectorizer(self): input_sentence = [ "The end is the beginning. The beginning is the end." ] async for ctx, results in MemoryOrchestrator.run( DataFlow.auto(tfidf_vectorizer, GetSingle), [ Input( value=[tfidf_vectorizer.op.outputs["result"].name], definition=GetSingle.op.inputs["spec"], ), Input( value=input_sentence, definition=tfidf_vectorizer.op.inputs["text"], ), Input( value=[1, 1], definition=count_vectorizer.op.inputs["ngram_range"], ), Input( value=True, definition=tfidf_vectorizer.op.inputs["get_feature_names"], ), ], ): vectors = results[tfidf_vectorizer.op.outputs["result"].name][0] features = results[tfidf_vectorizer.op.outputs["result"].name][1] self.assertTrue(isinstance(features, list)) self.assertTrue(isinstance(vectors, np.ndarray)) unique_tokens = list( set(input_sentence[0].lower().replace(".", "").split()) ) self.assertEqual(len(vectors[0]), len(unique_tokens)) self.assertEqual( set(features).intersection(set(unique_tokens)), set(features) )