def _test_submit_sab(self): topo = Topology('SabTest', namespace='mynamespace') s = topo.source([1,2]) es = s.for_each(lambda x : None) bb = streamsx.topology.context.submit('BUNDLE', topo, {}) self.assertIn('bundlePath', bb) self.assertIn('jobConfigPath', bb) sas = self.sc.get_streaming_analytics() sr = sas.submit_job(bundle=bb['bundlePath']) job_id = sr.get('id', sr.get('jobId')) self.assertIsNotNone(job_id) self.assertIn('name', sr) self.assertIn('application', sr) self.assertEqual('mynamespace::SabTest', sr['application']) cr = sas.cancel_job(job_id=job_id) jn = 'SABTEST:' + str(time.time()) jc = streamsx.topology.context.JobConfig(job_name=jn) sr = sas.submit_job(bundle=bb['bundlePath'], job_config=jc) job_id = sr.get('id', sr.get('jobId')) self.assertIsNotNone(job_id) self.assertIn('application', sr) self.assertEqual('mynamespace::SabTest', sr['application']) self.assertIn('name', sr) self.assertEqual(jn, sr['name']) cr = sas.cancel_job(job_id=job_id) os.remove(bb['bundlePath']) os.remove(bb['jobConfigPath'])
def test_schemas_bad(self): topo = Topology() pyObjStream = topo.source(['Hello', 'World!']) binStream = pyObjStream.map(func=lambda s: bytes("ABC", utf - 8), schema=CommonSchema.Binary) xmlStream = pyObjStream.map(schema=CommonSchema.XML) binMsgMetaStream = pyObjStream.map(func=lambda s: { 'message': bytes(s, 'utf-8'), 'key': s }, schema=MsgSchema.BinaryMessageMeta) strMsgMetaStream = pyObjStream.map(func=lambda s: { 'message': s, 'key': s }, schema=MsgSchema.StringMessageMeta) otherSplTupleStream1 = pyObjStream.map( schema=StreamSchema('tuple<int32 a>')) otherSplTupleStream2 = pyObjStream.map(schema='tuple<int32 a>') self.assertRaises(TypeError, evstr.publish, pyObjStream, "Topic") self.assertRaises(TypeError, evstr.publish, binStream, "Topic") self.assertRaises(TypeError, evstr.publish, xmlStream, "Topic") self.assertRaises(TypeError, evstr.publish, binMsgMetaStream, "Topic") self.assertRaises(TypeError, evstr.publish, strMsgMetaStream, "Topic") self.assertRaises(TypeError, evstr.publish, otherSplTupleStream1, "Topic") self.assertRaises(TypeError, evstr.publish, otherSplTupleStream2, "Topic")
def test_batch_aggregate(self): topo = Topology() s = topo.source(U.Sequence(iterations=122)) w = s.batch(size=10) a = R.Aggregate.invoke(w, ARSCHEMA) a.acount = a.count() a.acount_all = a.count_all() a.amax = a.max('seq') r = a.stream tester = Tester(topo) # Mimic the aggregate processing expected = [] for i in range(0, 120, 10): expected.append({ 'acount': 10, 'acount_all': 10, 'amax': i + 10 - 1 }) tester.contents(r, expected) tester.tuple_count(r, 12) tester.test(self.test_ctxtype, self.test_config)
def test_close_on_tuples(self): ae_service_creds_file = os.environ['ANALYTICS_ENGINE'] with open(ae_service_creds_file) as data_file: credentials = json.load(data_file) topo = Topology('test_hdfs_uri') if self.hdfs_toolkit_location is not None: tk.add_toolkit(topo, self.hdfs_toolkit_location) s = topo.source([ 'Hello World!', 'Hello', 'World', 'Hello World!', 'Hello', 'World' ]).as_string() result = hdfs.write(s, credentials=credentials, file='pytest/write_test%FILENUM.txt', tuples_per_file=3) result.print() tester = Tester(topo) tester.tuple_count(result, 2, exact=True) #tester.run_for(60) cfg = {} job_config = streamsx.topology.context.JobConfig(tracing='info') job_config.add(cfg) cfg[streamsx.topology.context.ConfigParams.SSL_VERIFY] = False # Run the test tester.test(self.test_ctxtype, cfg, always_collect_logs=True)
def main(): """ Sample transform application. This Python application builds a topology that * transforms a stream of string tuples from a source operator to a stream of integer tuples * uses `transform` to perform addition on the integer tuples * prints the stream to stdout * submits the topology in standalone mode (compiles and executes it as a standalone application) Example: > python3 transform_sample.py Output: 342 474 9342 """ # create the container for the topology that will hold the streams topo = Topology("transform_sample") # declare a source stream (`source`) that contains string tuples source = topo.source(transform_sample_functions.int_strings_transform) # transform the stream of string tuples (`source`) to a stream of integer tuples (`i1`) i1 = source.map(transform_sample_functions.string_to_int) # adds 17 to each integer tuple i2 = i1.map(transform_sample_functions.AddNum(17)) # terminate the stream by printing each tuple to stdout i2.print() # execute the application in standalone mode streamsx.topology.context.submit("STANDALONE", topo)
def test_fn(self): topo = Topology() s = fn_ecruos(topo) self._csl_stream(s, 'source', 'fn_ecruos') s = fn_retlif(s) self._csl_stream(s, 'filter', 'fn_retlif') s = fn_pam(s) self._csl_stream(s, 'map', 'fn_pam') s = fn_pam_talf(s) self._csl_stream(s, 'flat_map', 'fn_pam_talf') s = fn_gnirts_sa(s) self._csl_stream(s, 'as_string', 'fn_gnirts_sa') s = fn_nosj_sa(s) self._csl_stream(s, 'as_json', 'fn_nosj_sa') st = fn_ebircsbus(topo) self._csl_stream(st, 'subscribe', 'fn_ebircsbus') e = fn_hcae_rof(s) self._csl_sink(e, 'for_each', 'fn_hcae_rof') e = fn_hsilbup(s) self._csl_sink(e, 'publish', 'fn_hsilbup') e = fn_hsilbup(topo.source([]), schema=CommonSchema.Json) self._csl_sink(e, 'publish', 'fn_hsilbup') e = fn_tnirp(s) self._csl_sink(e, 'print', 'fn_tnirp')
def main(): #define needed variables COMMANDS_TOPIC = "streamsx/iot/device/commands/send" #topic to publish commands to EVENTS_TOPIC = "streamsx/iot/device/events" #topic to subscribe to for events incoming_schema = schema.StreamSchema( "tuple <rstring typeId, rstring deviceId, rstring eventId,rstring jsonString>" ) cmd_schema = schema.StreamSchema( 'tuple<rstring typeId, rstring deviceId, rstring cmdId, rstring jsonString>' ) topo = Topology('ReadingsFromIot') #Subscribe to events events = topo.subscribe(EVENTS_TOPIC, incoming_schema) sensor_events = events.filter(lambda tuple: tuple["eventId"] == "sensors") readings = sensor_events.map(get_event_data) readings.print() #send a command cmd_stream = sensor_events.map(get_cmd) #convert the commands stream to a SPL structured schema commands_to_publish = cmd_stream.map(lambda x: ( x["typeId"], x["deviceId"], x["cmdId"], x["jsonString"], ), schema=cmd_schema) commands_to_publish.publish(COMMANDS_TOPIC, cmd_schema) commands_to_publish.print() result = submit_to_service(topo) print("Submitted job to the service, job id = " + str(result.job.id))
def main(): local = sys.argv[1] == "local" #define needed variables COMMANDS_TOPIC = "streamsx/iot/device/commands/send" #topic to publish commands to EVENTS_TOPIC = "streamsx/iot/device/events" #topic to subscribe to for events incoming_schema = schema.StreamSchema("tuple <rstring typeId, rstring deviceId, rstring eventId,rstring jsonString>") cmd_schema = schema.StreamSchema('tuple<rstring typeId, rstring deviceId, rstring cmdId, rstring jsonString>') topo = Topology('ReadingsFromIot') #Subscribe to events events = topo.subscribe(EVENTS_TOPIC, incoming_schema,"AllEventsAsJSON") sensor_events = events.filter(lambda tuple: tuple["eventId"] == "sensors","SensorEventsAsJSON") readings = sensor_events.map(get_event_data,"ReadingsStream") readings.print() #send a command cmd_stream = sensor_events.map(get_cmd, "CommandsAsJSON") #convert the commands stream to a SPL structured schema commands_to_publish = cmd_stream.map(lambda x : (x["typeId"],x["deviceId"],x["cmdId"],x["jsonString"],), schema = cmd_schema, name="CommandsToPublish") commands_to_publish.publish(COMMANDS_TOPIC, cmd_schema) if local and len(sys.argv) > 2: username = sys.argv[2] password = sys.argv[3] result = submit_to_service(topo, local, username, password) else: result = submit_to_service(topo, local) print("Submitted job to the service, job id = " + str(result.job.id))
def main(): """ Finds outliers from a sequence of floats (e.g. simulating a sensor reading). Demonstrates function logic that maintains state across tuples. Example: python3 find_outliers.py Example Output: 2.753064082105016 -2.210758753960355 1.9847958795117937 2.661689193901883 2.468061723082693 ... """ topo = Topology("find_outliers") # Produce a stream of random float values with a normal # distribution, mean 0.0 and standard deviation 1. values = topo.source(find_outliers_functions.readings) # Filters the values based on calculating the mean and standard # deviation from the incoming data. In this case only outliers are # present in the output stream outliers. An outlier is defined as # more than (threshold * standard deviation) from the mean. The # threshold in this example is 2.0. # This demonstrates a functional logic class that is # stateful. The threshold, sum_x, and sum_x_squared maintain # their values across multiple invocations. outliers = values.filter(find_outliers_functions.IsOutlier(2.0)) outliers.print() streamsx.topology.context.submit("STANDALONE", topo.graph)
def test_fetch_logs_on_failure(self): topo = Topology("fetch_logs_on_failure") s = topo.source(["foo"]) tester = Tester(topo) # Causes test to fail tester.contents(s, ["bar"]) try: self.tester = tester tester.local_check = self._can_retrieve_logs tester.test(self.test_ctxtype, self.test_config) except AssertionError: # This test is expected to fail, do nothing. pass # Check if logs were downloaded if self.can_retrieve_logs: logs = tester.result['application_logs'] exists = os.path.isfile(logs) self.assertTrue( exists, "Application logs were not downloaded on test failure") if exists: os.remove(logs)
def main(): """ Sample filtering echo topology application. This Python application builds a simple topology that echos its command line arguments to standard output. This demonstrates use of Python functional logic to filter the tuples. A user-defined function implements the filtering logic, in this case only echo tuples that start with the letter `d`. Args: a list of values Example: python3 filter_echo.py cat dog mouse door Output: dog door """ topo = Topology("filter_echo") source = topo.source(sys.argv[1:]) # Declare a stream that will execute functional logic # against tuples on the echo stream. # For each tuple that will appear on echo, the # lambda function will be called, passing the tuple. # If it returns True then the tuple will appear on the filtered # stream, otherwise the tuple is discarded. filtered = source.filter(lambda tuple: tuple.startswith("d")) filtered.print() streamsx.topology.context.submit("STANDALONE", topo)
def main(): t = Topology("FFT_Sample") readings = t.source(signal_generator.Readings(50)).transform(TumblingWindow(10)) fftStream = readings.transform(fftpack.fft) fftStream.sink(print) streamsx.topology.context.submit("STANDALONE", t.graph)
def main(): """ Sample echo topology application. This Python application builds a simple topology that echoes its command line arguments to standard output. The application implements the typical pattern of code that declares a topology followed by submission of the topology to a Streams context. Args: a list of values to print to stdout Example: python3 echo.py hello1 hello2 hello3 Output: hello1 hello2 hello3 """ topo = Topology("echo") # The command line arguments (sys.argv) are captured by the SysArgv # callable class and will be used at runtime as the contents of the # echo stream. echo = topo.source(echo_functions.SysArgv(sys.argv[1:])) # print the echo stream to stdout echo.print() # At this point the topology is declared with a single # stream that is printed to stdout # execute the topology by submitting to a standalone context streamsx.topology.context.submit("STANDALONE", topo.graph)
def test_MQTTSink_schemas_bad(self): topo = Topology() pyObjStream = topo.source(['Hello', 'World!']) self.assertRaises(TypeError, pyObjStream.for_each, MQTTSink(server_uri='tcp://server:1833', topic='t1')) xmlStream = pyObjStream.map (schema=CommonSchema.XML) self.assertRaises(TypeError, xmlStream.for_each, MQTTSink(server_uri='tcp://server:1833', topic='t1'))
def main(): """ Sample Hello World topology application. This Python application builds a simple topology that prints Hello World to standard output. The application implements the typical pattern of code that declares a topology followed by submission of the topology to a Streams context. This demonstrates the mechanics of declaring a topology and executing it. Example: python3 hello_world.py Output: Hello World! """ # Create the container for the topology that will hold the streams of tuples. topo = Topology("hello_world") # Declare a source stream (hw) with string tuples containing two tuples, # "Hello" and "World!". hw = topo.source(hello_world_functions.source_tuples) # Sink hw by printing each of its tuples to standard output hw.print() # At this point the topology is declared with a single # stream that is printed to standard output # Now execute the topology by submitting to a standalone context. streamsx.topology.context.submit("STANDALONE", topo.graph)
def test_dir_scan(self): topo = Topology() script_dir = os.path.dirname(os.path.realpath(__file__)) sample_file = os.path.join(script_dir, 'data.csv') topo.add_file_dependency(sample_file, 'etc') # add sample file to etc dir in bundle fn = os.path.join('etc', 'data.csv') # file name relative to application dir dir = streamsx.spl.op.Expression.expression('getApplicationDir()+"' + '/etc"') scanned = topo.source( files.DirectoryScan(directory=dir, pattern='.*\.csv$')) r = scanned.map( files.CSVFilesReader(file_name='filename'), schema=StreamSchema('tuple<rstring a, int32 b, rstring filename>')) r.print() #result = streamsx.topology.context.submit("TOOLKIT", topo.graph) # creates tk* directory #print('(TOOLKIT):' + str(result)) #assert(result.return_code == 0) result = streamsx.topology.context.submit( "BUNDLE", topo.graph) # creates sab file assert (result.return_code == 0) os.remove(result.bundlePath) os.remove(result.jobConfigPath)
def test_MQTTSink_schemas(self): topo = Topology() pyObjStream = topo.source(['Hello', 'World!']) jsonStream = pyObjStream.as_json() # for_each() calls our populate() s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='ignored') jsonStream.for_each(s) self.assertEqual(s._op.params['dataAttributeName'], 'jsonString') stringStream = pyObjStream.as_string() s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='ignored') stringStream.for_each(s) self.assertEqual(s._op.params['dataAttributeName'], 'string') binStream = pyObjStream.map (func=lambda s: bytes(s, utf-8), schema=CommonSchema.Binary) s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='ignored') binStream.for_each(s) self.assertEqual(s._op.params['dataAttributeName'], 'binary') userMsgStream = pyObjStream.map(func=lambda s: {'data':s, 'topic_name':'t1'}, schema=MqttDataTuple) s = MQTTSink(server_uri='tcp://server:1833', topic='t1') userMsgStream.for_each(s) self.assertNotIn('dataAttributeName', s._op.params) s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='data') userMsgStream.for_each(s) self.assertEqual(s._op.params['dataAttributeName'], 'data') splMsgStream = pyObjStream.map(func=lambda s: {'m':s, 'k':s}, schema='tuple<rstring m, int64 k>') s = MQTTSink(server_uri='tcp://server:1833', topic='t1', data_attribute_name='m') splMsgStream.for_each(s) self.assertEqual(s._op.params['dataAttributeName'], 'm')
def test_score_with_feed_on_second_input_port(self): print('\n---------' + str(self)) name = 'test_score_with_feed_on_second_input_port' topo = Topology(name) streamsx.spl.toolkit.add_toolkit(topo, self.pmml_toolkit_home) credentials = self._get_credentials() models = pmml.model_feed(topo, connection_configuration=credentials, model_name="sample_pmml", polling_period=datetime.timedelta(minutes=5)) # sample with a single model predictor field s = topo.source(['first tuple', 'second tuple']).as_string() out_schema = StreamSchema('tuple<rstring string, rstring result>') res = pmml.score( s, schema=out_schema, model_input_attribute_mapping='p=string', model_stream=models, raw_result_attribute_name='result', initial_model_provisioning_timeout=datetime.timedelta(minutes=1)) res.print() if (("TestDistributed" in str(self)) or ("TestStreamingAnalytics" in str(self))): self._launch(topo) else: # build only self._build_only(name, topo)
def main(): """ The 'Estimator' model accepts a tuple with these elements: (type, X, y), where: 'type': 't' (for training), 'd' (for data), '' (empty string, same as 'd') 'X': is the data 'y': is the actual class of the data (only used to train the model) """ training_size = 100 num_centers = 2 num_features = 2 t = Topology("Estimator_Sample") trainingStream = t.source( sklearn_sources.Blobs(iterations=training_size, isTraining=True, centers=num_centers, n_features=num_features)) dataStream = t.source( sklearn_sources.Blobs(centers=num_centers, n_features=num_features)) combinedStreams = trainingStream.union({dataStream}) predictionStream = combinedStreams.transform( Estimator(training_size, KNeighborsClassifier())) predictionStream.sink(print) streamsx.topology.context.submit("STANDALONE", t.graph)
def _test_submit_sab(self): topo = Topology('SabTest', namespace='mynamespace') s = topo.source([1, 2]) es = s.for_each(lambda x: None) bb = streamsx.topology.context.submit('BUNDLE', topo, {}) self.assertIn('bundlePath', bb) self.assertIn('jobConfigPath', bb) sas = self.sc.get_streaming_analytics() sr = sas.submit_job(bundle=bb['bundlePath']) job_id = sr.get('id', sr.get('jobId')) self.assertIsNotNone(job_id) self.assertIn('name', sr) self.assertIn('application', sr) self.assertEqual('mynamespace::SabTest', sr['application']) cr = sas.cancel_job(job_id=job_id) jn = 'SABTEST:' + str(time.time()) jc = streamsx.topology.context.JobConfig(job_name=jn) sr = sas.submit_job(bundle=bb['bundlePath'], job_config=jc) job_id = sr.get('id', sr.get('jobId')) self.assertIsNotNone(job_id) self.assertIn('application', sr) self.assertEqual('mynamespace::SabTest', sr['application']) self.assertIn('name', sr) self.assertEqual(jn, sr['name']) cr = sas.cancel_job(job_id=job_id) os.remove(bb['bundlePath']) os.remove(bb['jobConfigPath'])
def test_endpoint_source(self): topo = Topology("test_endpoint_source") service_documentation={'title': 'streamsx-sample-endpoint-sources', 'description': '2 sources'} documentation = dict() documentation['summary'] = 'Test endpoint source' documentation['tags'] = ['Input', 'STREAMS'] documentation['description'] = 'CPD job endpoint injects some data' doc_attr = dict() descr = {'x': {'description': 'IDENTIFIER'}} doc_attr.update(descr) descr = {'n': {'description': 'NUMBER'}} doc_attr.update(descr) documentation['attributeDescriptions'] = doc_attr schema = 'tuple<rstring x, int64 n>' s = topo.source(EndpointSource(schema=schema, buffer_size=20000, service_documentation=service_documentation, endpoint_documentation=documentation), name='cpd_endpoint_src') s.print() documentation['summary'] = 'Test endpoint source JSON' s = topo.source(EndpointSource(schema=CommonSchema.Json, service_documentation=service_documentation, endpoint_documentation=documentation), name='cpd_endpoint_src_json') s.print() tester = Tester(topo) tester.run_for(10) tester.test(self.test_ctxtype, self.test_config)
def test_fetch_logs_on_failure(self): topo = Topology("fetch_logs_on_failure") s = topo.source(["foo"]) tester = Tester(topo) # Causes test to fail tester.contents(s, ["bar"]) try: self.tester = tester tester.local_check = self._can_retrieve_logs tester.test(self.test_ctxtype, self.test_config) except AssertionError: # This test is expected to fail, do nothing. pass # Check if logs were downloaded if self.can_retrieve_logs: logs = tester.result['application_logs'] exists = os.path.isfile(logs) self.assertTrue(exists, "Application logs were not downloaded on test failure") if exists: os.remove(logs)
def test_compile_MQTTSource(self): print ('\n---------'+str(self)) name = 'test_MQTTSource' topo = Topology(name) streamsx.spl.toolkit.add_toolkit(topo, self.mqtt_toolkit_home) src = MQTTSource(server_uri='tcp://server:1833', topics=['topic1', 'topic2'], schema=MqttDataTuple) # simply add all parameters; let' see if it compiles src.qos = [1, 2] src.message_queue_size = 122 src.client_id = "client-IDsrc" src.reconnection_bound = 25 src.trusted_certs = [TRUSTED_CERT_PEM, CLIENT_CA_CERT_PEM] src.client_cert = CLIENT_CERT_PEM src.client_private_key = PRIVATE_KEY_PEM src.ssl_protocol = 'TLSv1.1' src.vm_arg = ["-Xmx13G"] src.ssl_debug = True src.app_config_name = "abbconf2" src.command_timeout_millis=30000 src.keep_alive_seconds = 65 src.password = "******" src.username = "******" src.app_config_name = "mqtt_app_cfg" source_stream = topo.source(src, name='MqttStream') source_stream.print() # build only self._build_only(name, topo)
def test_source(self): topo = Topology() s = topo.source(s_none) self.assertEqual(CommonSchema.Python, s.oport.schema) s = topo.source(s_int) self.assertEqual(CommonSchema.Python, s.oport.schema) s = topo.source(s_str) self.assertEqual(CommonSchema.String, s.oport.schema) s = topo.source(s_any) self.assertEqual(CommonSchema.Python, s.oport.schema) s = topo.source(s_sensor) self.assertEqual(_normalize(SensorReading), s.oport.schema) s = topo.source(s_str_it) self.assertEqual(CommonSchema.String, s.oport.schema) s = topo.source(s_p) self.assertEqual(CommonSchema.Python, s.oport.schema) s = topo.source(s_s) self.assertEqual(CommonSchema.Python, s.oport.schema)
def main(): """ Sample Hello World topology application. This Python application builds a simple topology that prints Hello World to standard output. The application implements the typical pattern of code that declares a topology followed by submission of the topology to a Streams context. This demonstrates the mechanics of declaring a topology and executing it. Example: python3 hello_world.py Output: Hello World! """ # Create the container for the topology that will hold the streams of tuples. topo = Topology("hello_world") # Declare a source stream (hw) with string tuples containing two tuples, # "Hello" and "World!". hw = topo.source(["Hello", "World!"]) # Sink hw by printing each of its tuples to standard output hw.print() # At this point the topology is declared with a single # stream that is printed to standard output # Now execute the topology by submitting to a standalone context. streamsx.topology.context.submit("STANDALONE", topo)
def main(): """ Sample filtering echo topology application. This Python application builds a simple topology that echos its command line arguments to standard output. This demonstrates use of Python functional logic to filter the tuples. A user-defined function implements the filtering logic, in this case only echo tuples that start with the letter `d`. Args: a list of values Example: python3 filter_echo.py cat dog mouse door Output: dog door """ topo = Topology("filter_echo") source = topo.source(filter_echo_functions.SysArgv(sys.argv[1:])) # Declare a stream that will execute functional logic # against tuples on the echo stream. # For each tuple that will appear on echo, the below # `starts_with_d` method will be called. If it returns # True then the tuple will appear on the filtered # stream, otherwise the tuple is discarded. filtered = source.filter(filter_echo_functions.starts_with_d) filtered.print() streamsx.topology.context.submit("STANDALONE", topo.graph)
def test_scikit_learn(self): """Verify basic scikit-learn tutorial code works as a stream.""" digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) expected = [] for i in digits.data[-10:]: d = clf.predict(i.reshape(1, -1)) expected.append(d[0]) topo = Topology() topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') images = topo.source(digits.data[-10:], name='Images') images_digits = images.map( lambda image: clf.predict(image.reshape(1, -1))[0], name='Predict Digit') tester = Tester(topo) tester.contents(images_digits, expected) tester.tuple_count(images_digits, 10) tester.test(self.test_ctxtype, self.test_config)
def test_maintain_hints(self): topo = Topology() s = topo.source(s_str) s.map(m_str) self.assertRaises(TypeError, s.map, m_sensor) d = s.autonomous() d.map(m_str) self.assertRaises(TypeError, d.map, m_sensor) d = s.low_latency() d.map(m_str) self.assertRaises(TypeError, d.map, m_sensor) d = d.end_low_latency() d.map(m_str) self.assertRaises(TypeError, d.map, m_sensor) p = s.parallel(width=3) t = p.map(m_str).as_string() self.assertRaises(TypeError, p.map, m_sensor) e = t.end_parallel() e.map(m_str) self.assertRaises(TypeError, e.map, m_sensor)
def test_image_name_image_tag(self): topo = Topology("test_image_name_image_tag") heartbeat = topo.source(lambda: itertools.count()) heartbeat.print() image_name = 'py-tst' image_tag = 'v1.0' cfg = {ConfigParams.SSL_VERIFY: False} jc = JobConfig() jc.raw_overlay = { 'edgeConfig': { 'imageName': image_name, 'imageTag': image_tag, 'pipPackages': ['pandas', 'numpy'], 'rpms': ['atlas-devel'] } } jc.add(cfg) try: submission_result = submit(ContextTypes.EDGE, topo.graph, cfg) print(str(submission_result)) self.assertTrue(submission_result is not None) self.assertTrue(self._is_not_blank(submission_result.image)) self.assertTrue(self._is_not_blank(submission_result.imageDigest)) self.assertTrue(image_name in submission_result.image) self.assertTrue(image_tag in submission_result.image) except RuntimeError as e: print(str(e)) self.skipTest("Skip test, CPD does not support EDGE.")
def test_endpoint_sink(self): topo = Topology("test_endpoint_sink") stream1 = topo.source(lambda : itertools.count()).as_string() endpoint_documentation = dict() endpoint_documentation['summary'] = 'Sample endpoint sink' endpoint_documentation['tags'] = ['Output'] endpoint_documentation['description'] = 'Streams job endpoint emits some data with random numbers' doc_attr = dict() descr = {'string': {'description': 'number incremented by one'}} doc_attr.update(descr) endpoint_documentation['attributeDescriptions'] = doc_attr service_documentation={'title': 'streamsx-sample-endpoint-sink', 'description': 'NUMBER GENERATOR', 'version': '0.1.0', 'externalDocsUrl': 'https://mycompany.com/numgen/doc', 'externalDocsDescription': 'Number generator documentation'} tags = dict() tag1 = {'Output': {'description': 'Output tag description', 'externalDocs': {'url': 'https://mycompany.com/numgen/input/doc', 'description': 'Output tag external doc description'}}} tags.update(tag1) service_documentation['tags'] = tags stream1.for_each(EndpointSink(buffer_size=50000, endpoint_documentation=endpoint_documentation, service_documentation=service_documentation), name='cpd_endpoint_sink') tester = Tester(topo) tester.tuple_count(stream1, 10, exact=False) tester.run_for(10) tester.test(self.test_ctxtype, self.test_config)
def main(): """ Sample transform application. This Python application builds a topology that * transforms a stream of string tuples from a source operator to a stream of integer tuples * uses `transform` to perform addition on the integer tuples * prints the stream to stdout * submits the topology in standalone mode (compiles and executes it as a standalone application) Example: > python3 transform_sample.py Output: 342 474 9342 """ # create the container for the topology that will hold the streams topo = Topology("transform_sample") # declare a source stream (`source`) that contains string tuples source = topo.source(transform_sample_functions.int_strings_transform) # transform the stream of string tuples (`source`) to a stream of integer tuples (`i1`) i1 = source.transform(transform_sample_functions.string_to_int) # adds 17 to each integer tuple i2 = i1.transform(transform_sample_functions.AddNum(17)) # terminate the stream by printing each tuple to stdout i2.print() # execute the application in standalone mode streamsx.topology.context.submit("STANDALONE", topo.graph)
def test_to_avro_params(self): topo = Topology() s = topo.source(JsonData('a', 1)).as_json() avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, tuples_per_message=1000) avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, bytes_per_message=1024) avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, time_per_message=datetime.timedelta(seconds=5)) avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, time_per_message=5) avro.json_to_avro(s, avro_test_schema_file(), embed_avro_schema=True, time_per_message=15.0)
def test_no_python_schema(self): topo = Topology('test_no_python_schema') # EndpointSource does not support Python schema, expect TypeError self.assertRaises(TypeError, EndpointSource, schema=CommonSchema.Python) # EndpointSink does not support Python schema, expect TypeError stream1 = topo.source(lambda : itertools.count()) with self.assertRaises(TypeError): stream1.for_each(EndpointSink())
def main(): t = Topology("FFT_Sample") readings = t.source(signal_generator.Readings(50)).transform( TumblingWindow(10)) fftStream = readings.transform(fftpack.fft) fftStream.sink(print) streamsx.topology.context.submit("STANDALONE", t.graph)
def test_sequence(self): topo = Topology() s = topo.source(U.Sequence(iterations=122)) tester = Tester(topo) tester.tuple_check(s, lambda x: 'seq' in x and 'ts' in x) tester.tuple_count(s, 122) tester.test(self.test_ctxtype, self.test_config)
def test_source_argcount(self): topo = Topology() topo.source(a_0) topo.source(A_0()) self.assertRaises(TypeError, topo.source, a_1) self.assertRaises(TypeError, topo.source, A_1()) topo.source(ao_1) topo.source(AO_1())
def test_creds(self): creds_file = os.environ['EVENTSTREAMS_CREDENTIALS'] with open(creds_file) as data_file: credentials = json.load(data_file) topo = Topology() stream = topo.source(['Hello', 'World']).as_json() evstr.publish(stream, 'Topic', credentials=credentials) evstr.publish(stream, 'Topic', credentials='eventstreams')
def test_get_job(self): topo = Topology("job_in_result_test") topo.source(["foo"]) tester = Tester(topo) self.tester = tester tester.local_check = self._correct_job_ids tester.test(self.test_ctxtype, self.test_config)
def main(): ref_signal = signal.hann(10) t = Topology("Convolve_Sample") readings = t.source(signal_generator.Readings(100)).transform(TumblingWindow(20)) convolveStream = readings.transform(signal_functions.Convolve(ref_signal)) convolveStream.sink(print) streamsx.topology.context.submit("STANDALONE", t.graph)
def main(): filter_order = 4 cutoffFreq = 100 sampleRate = 1000 t = Topology("LowpassFilter_Sample") readings = t.source(signal_generator.Readings(50000)).transform(TumblingWindow(2000)) filterStream = readings.transform(butterworth.Lowpass(filter_order, cutoffFreq, sampleRate)) filterStream.sink(print) streamsx.topology.context.submit("STANDALONE", t.graph)
def main(): """ Sample continuous (streaming) grep topology application. This Python application builds a simple topology that periodically polls a directory for files, reads each file and output lines that contain the search term. Thus as each file is added to the directory, the application will read it and output matching lines. Args: directory (string): a directory that contains files to process search_string (string): a search term Example: * Create a subdirectory "dir" * Create file1.txt in subdirectory "dir" with the following contents: file1 line1 file1 line2 file1 line3 * Create file2.txt in subdirectory "dir" with the following contents: file2 line1 file2 line2 file2 line3 * python3 grep.py dir line2 Output: file1 line2 file2 line2 """ if len(sys.argv) != 3: print("Usage: python3 grep.py <directory> <search_string>") return directory = sys.argv[1] term = sys.argv[2] topo = Topology("grep") # Declare a stream that will contain the contents of the files. # For each input file, DirectoryWatcher opens the file and reads its contents # as a text file, producing a tuple for each line of the file. The tuple contains # the contents of the line, as a string. lines = topo.source(util_functions.DirectoryWatcher(directory)) # Filter out non-matching lines. FilterLine is a callable class # that will be executed for each tuple on lines, that is each line # read from a file. Only lines that contain the string `term` will # be included in the output stream. matching = lines.filter(grep_functions.FilterLine(term)) # print the matching lines to standard out matching.print() # execute the topology streamsx.topology.context.submit("STANDALONE", topo)
def test_get_job(self): topo = Topology("job_in_result_test") topo.source(["foo"]) sc = rest.StreamsConnection(username=self.username, password=self.password) sc.session.verify = False config = {ConfigParams.STREAMS_CONNECTION : sc} tester = Tester(topo) self.tester = tester tester.local_check = self._correct_job_ids tester.test(self.test_ctxtype, config)
def test_always_fetch_logs(self): topo = Topology("always_fetch_logs") s = topo.source(["foo"]) tester = Tester(topo) tester.contents(s, ["foo"]) tester.test(self.test_ctxtype, self.test_config, always_collect_logs=True) # Check if logs were downloaded logs = tester.result['application_logs'] exists = os.path.isfile(logs) self.assertTrue(exists, "Application logs were not downloaded on test success") if exists: os.remove(logs)
def main(): """ The 'Estimator' model accepts a tuple with these elements: (type, X, y), where: 'type': 't' (for training), 'd' (for data), '' (empty string, same as 'd') 'X': is the data 'y': is the actual class of the data (only used to train the model) """ training_size = 100 num_centers = 2 num_features = 2 t = Topology("Estimator_Sample") trainingStream = t.source(sklearn_sources.Blobs(iterations=training_size, isTraining=True, centers=num_centers, n_features=num_features)) dataStream = t.source(sklearn_sources.Blobs(centers=num_centers, n_features=num_features)) combinedStreams = trainingStream.union({dataStream}) predictionStream = combinedStreams.transform(Estimator(training_size, KNeighborsClassifier())) predictionStream.sink(print) streamsx.topology.context.submit("STANDALONE", t.graph)
def run(self, context="DISTRIBUTED"): ## Create topology topo = Topology("HealthcareDemo") ## Ingest, preprocess and aggregate patient data patientData = topo.subscribe("ingest-physionet", schema.CommonSchema.Json) \ .map(functions.identity) \ .filter(healthcare_functions.PatientFilter(self.patient_id)) \ .transform(healthcare_functions.GenTimestamp(self.sample_rate)) \ .transform(SlidingWindow(length=self.sample_rate, trigger=self.sample_rate-1)) \ .transform(healthcare_functions.aggregate) \ ## Calculate RPeak and RR delta rpeak_data_stream = patientmonitoring_functions.streaming_rpeak(patientData, self.sample_rate, data_label='ECG Lead II') ## Create a view of the data self.view_data = rpeak_data_stream.view() ## Compile Python Streams application and submit job streamsx.topology.context.submit(context, topo.graph, username=self.username, password=self.password)
def test_always_fetch_logs(self): topo = Topology("always_fetch_logs") s = topo.source(["foo"]) tester = Tester(topo) tester.contents(s, ["foo"]) self.tester = tester tester.local_check = self._can_retrieve_logs tester.test(self.test_ctxtype, self.test_config, always_collect_logs=True) if self.can_retrieve_logs: # streams version is >= 4.2.4. Fetching logs is supported. # Check if logs were downloaded logs = tester.result['application_logs'] exists = os.path.isfile(logs) self.assertTrue(exists, "Application logs were not downloaded on test success") if exists: os.remove(logs)
def main(): """ Plays Fizz Buzz (https://en.wikipedia.org/wiki/Fizz_buzz) Example: python3 fizz_buzz.py Output: 1 2 Fizz! 4 Buzz! Fizz! 7 8 Fizz! Buzz! 11 Fizz! 13 14 FizzBuzz! ... """ topo = Topology("fizz_buzz") # Declare a stream of int values counting = topo.source(fizz_buzz_functions.int_tuples) # Print the tuples to standard output play_fizz_buzz(counting).print() # At this point the streaming topology (streaming) is # declared, but no data is flowing. The topology # must be submitted to a context to be executed. # execute the topology by submitting to a standalone context streamsx.topology.context.submit("STANDALONE", topo.graph)
def test_get_job(self): topo = Topology("job_in_result_test") topo.source(["foo"]) tester = Tester(topo) self.tester = tester tester.local_check = self._correct_job_ids tester.test(self.test_ctxtype, self.test_config) sr = tester.submission_result self.assertIn('submitMetrics', sr) m = sr['submitMetrics'] self.assertIn('buildArchiveSize', m) self.assertIn('buildArchiveUploadTime_ms', m) self.assertIn('totalBuildTime_ms', m) self.assertIn('jobSubmissionTime_ms', m) self.assertTrue(m['buildArchiveSize'] > 0) self.assertTrue(m['buildArchiveUploadTime_ms'] > 0) self.assertTrue(m['totalBuildTime_ms'] > 0) self.assertTrue(m['jobSubmissionTime_ms'] > 0)
def test_scikit_learn(self): """Verify basic scikit-learn tutorial code works as a stream.""" digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) expected = [] for i in digits.data[-10:]: d = clf.predict(i.reshape(1,-1)) expected.append(d[0]) topo = Topology() topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') images = topo.source(digits.data[-10:], name='Images') images_digits = images.map(lambda image : clf.predict(image.reshape(1,-1))[0], name='Predict Digit') tester = Tester(topo) tester.contents(images_digits, expected) tester.tuple_count(images_digits, 10) tester.test(self.test_ctxtype, self.test_config)
def test_class(self): topo = Topology() ct = CallTopo() s = ct.ecruos(topo) self._csl_stream(s, 'source', 'ecruos', cls='CallTopo') s = ct.retlif(s) self._csl_stream(s, 'filter', 'retlif', cls='CallTopo') s = ct.pam(s) self._csl_stream(s, 'map', 'pam', cls='CallTopo') s = ct.pam_talf(s) self._csl_stream(s, 'flat_map', 'pam_talf', cls='CallTopo') s = ct.gnirts_sa(s) self._csl_stream(s, 'as_string', 'gnirts_sa', cls='CallTopo') s = ct.nosj_sa(s) self._csl_stream(s, 'as_json', 'nosj_sa', cls='CallTopo') st = ct.ebircsbus(topo) self._csl_stream(st, 'subscribe', 'ebircsbus', cls='CallTopo') e = ct.hcae_rof(s) self._csl_sink(e, 'for_each', 'hcae_rof', cls='CallTopo') e = ct.hsilbup(s) self._csl_sink(e, 'publish', 'hsilbup', cls='CallTopo') # test with implict schema change e = ct.hsilbup(topo.source([]), schema=CommonSchema.Json) self._csl_sink(e, 'publish', 'hsilbup', cls='CallTopo') e = ct.tnirp(s) self._csl_sink(e, 'print', 'tnirp', cls='CallTopo')
def main(): """ Sample temperature sensor topology application. This Python application builds a simple topology that prints an infinite stream of random numbers to standard output. The application implements the typical pattern of code that declares a topology followed by submission of the topology to a Streams context. Example: python3 temperature_sensor.py Output: ... 0.3235259780332219 1.7694181431337437 0.27741668353194443 -0.18827948813268522 0.9576092897071428 -0.8918033752738117 -1.4946580133821907 ... (Ctlr-C to exit) """ # Create the container for the topology that will hold the streams of tuples. topo = Topology("temperature_sensor") # Declare an infinite stream of random numbers source = topo.source(temperature_sensor_functions.readings) # Sink the stream by printing each of its tuples to standard output source.print() # Now execute the topology by submitting to a standalone context. streamsx.topology.context.submit("STANDALONE", topo.graph)
def main(): """ Sample continuous (streaming) regular expression grep topology application. This Python application builds a simple topology that periodically polls a directory for files, reads each file and output lines that match a regular expression. The matching is done on a stream parallelized into 5 parallel channels. Tuples are routed to parallel channels such that an even distribution is maintained. Args: directory (string): a directory that contains files to process search_pattern (string): a search pattern Example: * In addition to including the `com.ibm.streamsx.topology/opt/python/packages` directory in the PYTHONPATH environment variable, also include the `samples/python/topology/simple` directory. * Create a subdirectory "dir" * Create file1.txt in subdirectory "dir" with the following contents: file1 line1 file1 line2 file1 line3 * Create file2.txt in subdirectory "dir" with the following contents: file2 line1 file2 line2 file2 line3 * python3 parallel_regex_grep.py dir line[1-2] Example Output (intermixed): file2 line1 file2 line2 file1 line1 file1 line2 LineCounter@139676451944432 has sent ... LineCounter@139676451944432 has sent 6 lines to be filtered. <== The source operator produced a total of 6 tuples 1. FilterLine@139676451362072 has received 1 lines on this parallel channel. <== 5 filter operators are created, one for each parallel channel. 2. FilterLine@139676441656064 has received 1 lines on this parallel channel. 4 operators processed 1 tuple each. 3. FilterLine@139676441211568 has received 1 lines on this parallel channel. 1 operator processed 2 tuples. 4. FilterLine@139676441211848 has received 1 lines on this parallel channel. 5. FilterLine@139676441655728 has received ... FilterLine@139676441655728 has received 2 lines on this parallel channel. """ if len(sys.argv) != 3: print("Usage: python3 parallel_regex_grep.py <directory> <search_pattern>") return directory = sys.argv[1] pattern = sys.argv[2] # Define the topology topo = Topology("parallel_regex_grep") # Declare a stream with tuples that are string objects # All files in a directory are read, resulting in lines of text # Each line is a tuple in the stream lines = topo.source(util_functions.DirectoryWatcher(directory)) # Count the total number of lines before they are split between # different parallel channels. lines_counter = lines.transform(parallel_regex_grep_functions.LineCounter()) # Parallelize the Stream. # Since there are 5 channels of the stream, the approximate number of # lines sent to each channel should be numSentStrings/5. This can be # verified by comparing the outputs of the lines_counter stream to that # of the parallel channels. lines_parallel = lines_counter.parallel(5); # Filter for the matched string, and print the number strings that have # been tested. This is happening in parallel. filtered_parallel = lines_parallel.filter(parallel_regex_grep_functions.FilterLine(pattern)) # Join the results of each parallel filter into one stream, # merging the parallel streams back into one stream. filtered_condensed = filtered_parallel.end_parallel(); # Print the combined results filtered_condensed.print() # Execute the topology streamsx.topology.context.submit("STANDALONE", topo.graph)
def main(): """ Introduction to streaming with scikit-learn. Adapts the scikit-learn basic tutorial to a streaming environment. In a streaming environment events arrive continually and as individual items. In this case the digit prediction example is adapted to predict a digit as each image arrives. The training of the prediction model occurs locally using the example digits dataset, while the runtime prediction of images occurs in the IBM Cloud using the Streaming Analytics service. The original scikit-learn tutorial is at: http://scikit-learn.org/stable/tutorial/basic/tutorial.html """ # Load the data and train the model. digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) # Start the streaming application definition topo = Topology(namespace='ScikitLearn', name='Images') # For use on the service we need to require scikit-learn topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') # Create a stream of images by cycling through the last # ten images (which were excluded from the training) # Each tuple on the stream represents a single image. images = topo.source(itertools.cycle(digits.data[-10:]), name='Images') # Predict the digit from the image using the trained model. # The map method declares a stream (images_digits) that is # the result of applying a function to each tuple on its # input stream (images) # # In this case the function is a lambda that predicts the # digit for an image using the model clf. Each return # from the lambda becomes a tuple on images_digits, # in this case a dictionary containing the image and the prediction. # # Note that the lambda function captures the model (clf) # and it will be pickled (using dill) to allow it to # be used on the service (which runs in IBM Cloud). # images_digits = images.map(lambda image : {'image':image, 'digit':clf.predict(image.reshape(1,-1))[0]}, name='Predict Digit') images_digits.for_each(lambda x : None, name='Noop') # Note at this point topo represents the declaration of the # streaming application that predicts digits from images. # It must be submitted to an execution context, in this case # an instance of Streaming Analytics service running on IBM Cloud. sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo) print(sr)
def test_keep_schema_string(self): topo = Topology() s = topo.source([]).as_string() self._check_kept(s)
def test_keep_schema_json(self): topo = Topology() s = topo.source([]).as_json() self._check_kept(s)
def main(): """ This is a variant of images.py that loads the model from a file. Here the Streams application is declared using a model contained in a file. This is a typical pattern where the model is created off-line and saved to a file. Subsequently applications load the file to perform predictions. Comments are mainly focused on the model loading, see images.py for details on other statements. http://scikit-learn.org/stable/modules/model_persistence.html """ # Load the data and train the model. digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) # Persist the model as a file joblib.dump(clf, 'digitmodel.pkl') # Just to ensure we are not referencing the local # instance of the model, we will load the model at # runtime from the file. clf = None topo = Topology(namespace='ScikitLearn', name='ImagesModelFile') topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') images = topo.source(itertools.cycle(digits.data[-10:]), name='Images') # Add the model to the topology. This will take a copy # of the file and make it available when the job # is running. The returned path is relative to the # job's application directory. See DigitPredictor() for # how it is used. model_path = topo.add_file_dependency('digitmodel.pkl', 'etc') # Predict the digit from the image using the trained model. # The map method declares a stream (images_digits) that is # the result of applying a function to each tuple on its # input stream (images) # # At runtime we need to load the model from the file so instead # of a stateless lambda function we use an instance a class. # This class (DigitPredictor) has the model path as its state # and will load the model from the file when the job is excuting # in the IBM Cloud. images_digits = images.map(DigitPredictor(model_path), name='Predict Digit') images_digits.for_each(lambda x : None, name='Noop') # Note at this point topo represents the declaration of the # streaming application that predicts digits from images. # It must be submitted to an execution context, in this case # an instance of Streaming Analytics service running on IBM Cloud. sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo) print(sr) # Clean up, the running job has its own copy of the model file os.remove('digitmodel.pkl')
def test_keep_schema_schema(self): topo = Topology() s = topo.source([]).map(lambda x : x, schema='tuple<rstring a, int32 b>') self._check_kept(s)