def test_scikit_learn(self): """Verify basic scikit-learn tutorial code works as a stream.""" digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) expected = [] for i in digits.data[-10:]: d = clf.predict(i.reshape(1, -1)) expected.append(d[0]) topo = Topology() topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') images = topo.source(digits.data[-10:], name='Images') images_digits = images.map( lambda image: clf.predict(image.reshape(1, -1))[0], name='Predict Digit') tester = Tester(topo) tester.contents(images_digits, expected) tester.tuple_count(images_digits, 10) tester.test(self.test_ctxtype, self.test_config)
def monitor( job_name, name_space, redis_base=None, topic={ 'ship': 'bluewaterShip', 'container': 'bluewaterContainer', 'problem': 'bluewaterProblem' }): topology = Topology(job_name, name_space) topology.add_pip_package('streamsx.messagehub') # fetch and tag tuples shipMh = streamsx.messagehub.subscribe(topology, schema=CommonSchema.Json, topic=topic['ship'], name="shipMH") shipMh = shipMh.map(TagTuple("ship"), name="shipTag") containerMh = streamsx.messagehub.subscribe(topology, schema=CommonSchema.Json, topic=topic['container'], name="containerMH") containerMh = containerMh.map(TagTuple("container"), name="containerTag") # normalize the tuples interLeaved = shipMh.union({containerMh}) consolidated = interLeaved.map(Consolidate(), name="consolidate") complete = consolidated.map(augment_weather, name="weatherAugment") complete.print(tag="complete") # process the data heatwaveFiltered = complete.filter(Heatwave(), name="heatwaveTest") formatHeatwave = heatwaveFiltered.map(format_heatwave, name="heatwaveFmt") unitDownFiltered = complete.filter(UnitDown(), name="downTest") formatDown = unitDownFiltered.map(format_unitDown, name="downFmt") fireFiltered = complete.filter(lambda t: t['tempC'] > 200.0, name="fireTest") formatFire = fireFiltered.map(format_fire, name="fireFmt") # colsolidate notification - redis + messagehub formatted = formatFire.union({formatDown, formatHeatwave}) # formatted.sink(TransmitRedis(credentials=credential.redisCredential, # dest_key=redis_base + "/bluewater/notify", chunk_count=100), name="notifyRedis") messageProblem = formatted.as_json(name="castJson") streamsx.messagehub.publish(messageProblem, topic=topic['problem'], name="problemMH") return topology
def main(): topo = Topology("WikipediaSSE") topo.add_pip_package('sseclient') # See https://wikitech.wikimedia.org/wiki/Event_Platform/EventStreams wiki_url = 'https://stream.wikimedia.org/v2/stream/recentchange' events = topo.source(SSEReader(wiki_url), name='WikiRecentChanges') # Only pass through events with data. events = events.filter(lambda event: event.data, name='NonEmptyEvents') # extract the data as strings events = events.map(lambda event: event.data, schema=str, name='EventData') events.sink(print) streamsx.topology.context.submit("STANDALONE", topo)
def monitor(job_name, name_space, mh_topic, redis_base=None): topo = Topology(job_name, name_space) topo.add_pip_package('streamsx.messagehub') ranges = container_ranges(None) fromMh = streamsx.messagehub.subscribe(topo, schema=CommonSchema.Json, topic=mh_topic) aggTemp = fromMh.map(AggTemp(container_thresholds=ranges), name="aggTemp") filterRange = aggTemp.filter(OutOfRangeTemp(container_thresholds=ranges), name="rangeFilter") filterRange.sink(TransmitRedis(credentials=credential.redisCredential, dest_key=redis_base + "/outOfRange", chunk_count=10), name="rangeRedis") aggTemp.sink(TransmitRedis(credentials=credential.redisCredential, dest_key=redis_base + "/allRange", chunk_count=10), name="allRedis") return topo
def test_scikit_learn(self): """Verify basic scikit-learn tutorial code works as a stream.""" digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) expected = [] for i in digits.data[-10:]: d = clf.predict(i.reshape(1,-1)) expected.append(d[0]) topo = Topology() topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') images = topo.source(digits.data[-10:], name='Images') images_digits = images.map(lambda image : clf.predict(image.reshape(1,-1))[0], name='Predict Digit') tester = Tester(topo) tester.contents(images_digits, expected) tester.tuple_count(images_digits, 10) tester.test(self.test_ctxtype, self.test_config)
def main(): """ This is a variant of images.py that loads the model from a file. Here the Streams application is declared using a model contained in a file. This is a typical pattern where the model is created off-line and saved to a file. Subsequently applications load the file to perform predictions. Comments are mainly focused on the model loading, see images.py for details on other statements. http://scikit-learn.org/stable/modules/model_persistence.html """ # Load the data and train the model. digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) # Persist the model as a file joblib.dump(clf, 'digitmodel.pkl') # Just to ensure we are not referencing the local # instance of the model, we will load the model at # runtime from the file. clf = None topo = Topology(namespace='ScikitLearn', name='ImagesModelFile') topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') images = topo.source(itertools.cycle(digits.data[-10:]), name='Images') # Add the model to the topology. This will take a copy # of the file and make it available when the job # is running. The returned path is relative to the # job's application directory. See DigitPredictor() for # how it is used. model_path = topo.add_file_dependency('digitmodel.pkl', 'etc') # Predict the digit from the image using the trained model. # The map method declares a stream (images_digits) that is # the result of applying a function to each tuple on its # input stream (images) # # At runtime we need to load the model from the file so instead # of a stateless lambda function we use an instance a class. # This class (DigitPredictor) has the model path as its state # and will load the model from the file when the job is excuting # in the IBM Cloud. images_digits = images.map(DigitPredictor(model_path), name='Predict Digit') images_digits.for_each(lambda x: None, name='Noop') # Note at this point topo represents the declaration of the # streaming application that predicts digits from images. # It must be submitted to an execution context, in this case # an instance of Streaming Analytics service running on IBM Cloud. sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo) print(sr) # Clean up, the running job has its own copy of the model file os.remove('digitmodel.pkl')
def main(): """ This is a variant of images.py that loads the model from a file. Here the Streams application is declared using a model contained in a file. This is a typical pattern where the model is created off-line and saved to a file. Subsequently applications load the file to perform predictions. Comments are mainly focused on the model loading, see images.py for details on other statements. http://scikit-learn.org/stable/modules/model_persistence.html """ # Load the data and train the model. digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) # Persist the model as a file joblib.dump(clf, 'digitmodel.pkl') # Just to ensure we are not referencing the local # instance of the model, we will load the model at # runtime from the file. clf = None topo = Topology(namespace='ScikitLearn', name='ImagesModelFile') topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') images = topo.source(itertools.cycle(digits.data[-10:]), name='Images') # Add the model to the topology. This will take a copy # of the file and make it available when the job # is running. The returned path is relative to the # job's application directory. See DigitPredictor() for # how it is used. model_path = topo.add_file_dependency('digitmodel.pkl', 'etc') # Predict the digit from the image using the trained model. # The map method declares a stream (images_digits) that is # the result of applying a function to each tuple on its # input stream (images) # # At runtime we need to load the model from the file so instead # of a stateless lambda function we use an instance a class. # This class (DigitPredictor) has the model path as its state # and will load the model from the file when the job is excuting # in the IBM Cloud. images_digits = images.map(DigitPredictor(model_path), name='Predict Digit') images_digits.for_each(lambda x : None, name='Noop') # Note at this point topo represents the declaration of the # streaming application that predicts digits from images. # It must be submitted to an execution context, in this case # an instance of Streaming Analytics service running on IBM Cloud. sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo) print(sr) # Clean up, the running job has its own copy of the model file os.remove('digitmodel.pkl')
def main(): """ Introduction to streaming with scikit-learn. Adapts the scikit-learn basic tutorial to a streaming environment. In a streaming environment events arrive continually and as individual items. In this case the digit prediction example is adapted to predict a digit as each image arrives. The training of the prediction model occurs locally using the example digits dataset, while the runtime prediction of images occurs in the IBM Cloud using the Streaming Analytics service. The original scikit-learn tutorial is at: http://scikit-learn.org/stable/tutorial/basic/tutorial.html """ # Load the data and train the model. digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) # Start the streaming application definition topo = Topology(namespace='ScikitLearn', name='Images') # For use on the service we need to require scikit-learn topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') # Create a stream of images by cycling through the last # ten images (which were excluded from the training) # Each tuple on the stream represents a single image. images = topo.source(itertools.cycle(digits.data[-10:]), name='Images') # Predict the digit from the image using the trained model. # The map method declares a stream (images_digits) that is # the result of applying a function to each tuple on its # input stream (images) # # In this case the function is a lambda that predicts the # digit for an image using the model clf. Each return # from the lambda becomes a tuple on images_digits, # in this case a dictionary containing the image and the prediction. # # Note that the lambda function captures the model (clf) # and it will be pickled (using dill) to allow it to # be used on the service (which runs in IBM Cloud). # images_digits = images.map(lambda image : {'image':image, 'digit':clf.predict(image.reshape(1,-1))[0]}, name='Predict Digit') images_digits.for_each(lambda x : None, name='Noop') # Note at this point topo represents the declaration of the # streaming application that predicts digits from images. # It must be submitted to an execution context, in this case # an instance of Streaming Analytics service running on IBM Cloud. sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo) print(sr)
def main(): """ Introduction to streaming with scikit-learn. Adapts the scikit-learn basic tutorial to a streaming environment. In a streaming environment events arrive continually and as individual items. In this case the digit prediction example is adapted to predict a digit as each image arrives. The training of the prediction model occurs locally using the example digits dataset, while the runtime prediction of images occurs in the IBM Cloud using the Streaming Analytics service. The original scikit-learn tutorial is at: http://scikit-learn.org/stable/tutorial/basic/tutorial.html """ # Load the data and train the model. digits = datasets.load_digits() clf = svm.SVC(gamma=0.001, C=100.) clf.fit(digits.data[:-10], digits.target[:-10]) # Start the streaming application definition topo = Topology(namespace='ScikitLearn', name='Images') # For use on the service we need to require scikit-learn topo.add_pip_package('scikit-learn') topo.exclude_packages.add('sklearn') # Create a stream of images by cycling through the last # ten images (which were excluded from the training) # Each tuple on the stream represents a single image. images = topo.source(itertools.cycle(digits.data[-10:]), name='Images') # Predict the digit from the image using the trained model. # The map method declares a stream (images_digits) that is # the result of applying a function to each tuple on its # input stream (images) # # In this case the function is a lambda that predicts the # digit for an image using the model clf. Each return # from the lambda becomes a tuple on images_digits, # in this case a dictionary containing the image and the prediction. # # Note that the lambda function captures the model (clf) # and it will be pickled (using dill) to allow it to # be used on the service (which runs in IBM Cloud). # images_digits = images.map(lambda image: { 'image': image, 'digit': clf.predict(image.reshape(1, -1))[0] }, name='Predict Digit') images_digits.for_each(lambda x: None, name='Noop') # Note at this point topo represents the declaration of the # streaming application that predicts digits from images. # It must be submitted to an execution context, in this case # an instance of Streaming Analytics service running on IBM Cloud. sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo) print(sr)