Exemplo n.º 1
0
    def test_scikit_learn(self):
        """Verify basic scikit-learn tutorial code works as a stream."""
        digits = datasets.load_digits()
        clf = svm.SVC(gamma=0.001, C=100.)
        clf.fit(digits.data[:-10], digits.target[:-10])

        expected = []
        for i in digits.data[-10:]:
            d = clf.predict(i.reshape(1, -1))
            expected.append(d[0])

        topo = Topology()

        topo.add_pip_package('scikit-learn')
        topo.exclude_packages.add('sklearn')

        images = topo.source(digits.data[-10:], name='Images')
        images_digits = images.map(
            lambda image: clf.predict(image.reshape(1, -1))[0],
            name='Predict Digit')

        tester = Tester(topo)
        tester.contents(images_digits, expected)
        tester.tuple_count(images_digits, 10)
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 2
0
def monitor(
    job_name,
    name_space,
    redis_base=None,
    topic={
        'ship': 'bluewaterShip',
        'container': 'bluewaterContainer',
        'problem': 'bluewaterProblem'
    }):
    topology = Topology(job_name, name_space)
    topology.add_pip_package('streamsx.messagehub')
    # fetch and tag tuples
    shipMh = streamsx.messagehub.subscribe(topology,
                                           schema=CommonSchema.Json,
                                           topic=topic['ship'],
                                           name="shipMH")
    shipMh = shipMh.map(TagTuple("ship"), name="shipTag")
    containerMh = streamsx.messagehub.subscribe(topology,
                                                schema=CommonSchema.Json,
                                                topic=topic['container'],
                                                name="containerMH")
    containerMh = containerMh.map(TagTuple("container"), name="containerTag")
    # normalize the tuples
    interLeaved = shipMh.union({containerMh})
    consolidated = interLeaved.map(Consolidate(), name="consolidate")
    complete = consolidated.map(augment_weather, name="weatherAugment")

    complete.print(tag="complete")
    # process the data
    heatwaveFiltered = complete.filter(Heatwave(), name="heatwaveTest")
    formatHeatwave = heatwaveFiltered.map(format_heatwave, name="heatwaveFmt")
    unitDownFiltered = complete.filter(UnitDown(), name="downTest")
    formatDown = unitDownFiltered.map(format_unitDown, name="downFmt")
    fireFiltered = complete.filter(lambda t: t['tempC'] > 200.0,
                                   name="fireTest")
    formatFire = fireFiltered.map(format_fire, name="fireFmt")

    # colsolidate notification - redis + messagehub
    formatted = formatFire.union({formatDown, formatHeatwave})
    # formatted.sink(TransmitRedis(credentials=credential.redisCredential,
    #                                  dest_key=redis_base + "/bluewater/notify", chunk_count=100), name="notifyRedis")
    messageProblem = formatted.as_json(name="castJson")
    streamsx.messagehub.publish(messageProblem,
                                topic=topic['problem'],
                                name="problemMH")

    return topology
Exemplo n.º 3
0
def main():
    topo = Topology("WikipediaSSE")
    topo.add_pip_package('sseclient')

    # See https://wikitech.wikimedia.org/wiki/Event_Platform/EventStreams
    wiki_url = 'https://stream.wikimedia.org/v2/stream/recentchange'
    events = topo.source(SSEReader(wiki_url), name='WikiRecentChanges')

    # Only pass through events with data.
    events = events.filter(lambda event: event.data, name='NonEmptyEvents')

    # extract the data as strings
    events = events.map(lambda event: event.data, schema=str, name='EventData')

    events.sink(print)

    streamsx.topology.context.submit("STANDALONE", topo)
Exemplo n.º 4
0
def monitor(job_name, name_space, mh_topic, redis_base=None):
    topo = Topology(job_name, name_space)
    topo.add_pip_package('streamsx.messagehub')

    ranges = container_ranges(None)
    fromMh = streamsx.messagehub.subscribe(topo,
                                           schema=CommonSchema.Json,
                                           topic=mh_topic)
    aggTemp = fromMh.map(AggTemp(container_thresholds=ranges), name="aggTemp")
    filterRange = aggTemp.filter(OutOfRangeTemp(container_thresholds=ranges),
                                 name="rangeFilter")

    filterRange.sink(TransmitRedis(credentials=credential.redisCredential,
                                   dest_key=redis_base + "/outOfRange",
                                   chunk_count=10),
                     name="rangeRedis")
    aggTemp.sink(TransmitRedis(credentials=credential.redisCredential,
                               dest_key=redis_base + "/allRange",
                               chunk_count=10),
                 name="allRedis")

    return topo
Exemplo n.º 5
0
    def test_scikit_learn(self):
        """Verify basic scikit-learn tutorial code works as a stream."""
        digits = datasets.load_digits()
        clf = svm.SVC(gamma=0.001, C=100.)
        clf.fit(digits.data[:-10], digits.target[:-10])

        expected = []
        for i in digits.data[-10:]:
            d = clf.predict(i.reshape(1,-1))
            expected.append(d[0])

        topo = Topology()

        topo.add_pip_package('scikit-learn')
        topo.exclude_packages.add('sklearn')

        images = topo.source(digits.data[-10:], name='Images')
        images_digits = images.map(lambda image : clf.predict(image.reshape(1,-1))[0], name='Predict Digit')

        tester = Tester(topo)
        tester.contents(images_digits, expected)
        tester.tuple_count(images_digits, 10)
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 6
0
def main():
    """
    This is a variant of images.py that loads the model from a file.

    Here the Streams application is declared using a model
    contained in a file. This is a typical pattern where
    the model is created off-line and saved to a file.
    Subsequently applications load the file to perform predictions.

    Comments are mainly focused on the model loading, see
    images.py for details on other statements.

    http://scikit-learn.org/stable/modules/model_persistence.html
    """
    # Load the data and train the model.
    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.)
    clf.fit(digits.data[:-10], digits.target[:-10])

    # Persist the model as a file
    joblib.dump(clf, 'digitmodel.pkl')

    # Just to ensure we are not referencing the local
    # instance of the model, we will load the model at
    # runtime from the file.
    clf = None

    topo = Topology(namespace='ScikitLearn', name='ImagesModelFile')

    topo.add_pip_package('scikit-learn')
    topo.exclude_packages.add('sklearn')

    images = topo.source(itertools.cycle(digits.data[-10:]), name='Images')

    # Add the model to the topology. This will take a copy
    # of the file and make it available when the job
    # is running. The returned path is relative to the
    # job's application directory. See DigitPredictor() for
    # how it is used.
    model_path = topo.add_file_dependency('digitmodel.pkl', 'etc')

    # Predict the digit from the image using the trained model.
    # The map method declares a stream (images_digits) that is
    # the result of applying a function to each tuple on its
    # input stream (images)
    #
    # At runtime we need to load the model from the file so instead
    # of a stateless lambda function we use an instance a class.
    # This class (DigitPredictor) has the model path as its state
    # and will load the model from the file when the job is excuting
    # in the IBM Cloud.
    images_digits = images.map(DigitPredictor(model_path),
                               name='Predict Digit')

    images_digits.for_each(lambda x: None, name='Noop')

    # Note at this point topo represents the declaration of the
    # streaming application that predicts digits from images.
    # It must be submitted to an execution context, in this case
    # an instance of Streaming Analytics service running on IBM Cloud.

    sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo)
    print(sr)

    # Clean up, the running job has its own copy of the model file
    os.remove('digitmodel.pkl')
def main():
    """
    This is a variant of images.py that loads the model from a file.

    Here the Streams application is declared using a model
    contained in a file. This is a typical pattern where
    the model is created off-line and saved to a file.
    Subsequently applications load the file to perform predictions.

    Comments are mainly focused on the model loading, see
    images.py for details on other statements.

    http://scikit-learn.org/stable/modules/model_persistence.html
    """
    # Load the data and train the model.
    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.)
    clf.fit(digits.data[:-10], digits.target[:-10])

    # Persist the model as a file
    joblib.dump(clf, 'digitmodel.pkl')

    # Just to ensure we are not referencing the local
    # instance of the model, we will load the model at
    # runtime from the file.
    clf = None

    topo = Topology(namespace='ScikitLearn', name='ImagesModelFile')

    topo.add_pip_package('scikit-learn')
    topo.exclude_packages.add('sklearn')

    images = topo.source(itertools.cycle(digits.data[-10:]), name='Images')

    # Add the model to the topology. This will take a copy
    # of the file and make it available when the job
    # is running. The returned path is relative to the
    # job's application directory. See DigitPredictor() for
    # how it is used.
    model_path = topo.add_file_dependency('digitmodel.pkl', 'etc')

    # Predict the digit from the image using the trained model.
    # The map method declares a stream (images_digits) that is
    # the result of applying a function to each tuple on its
    # input stream (images) 
    #
    # At runtime we need to load the model from the file so instead
    # of a stateless lambda function we use an instance a class.
    # This class (DigitPredictor) has the model path as its state
    # and will load the model from the file when the job is excuting
    # in the IBM Cloud.
    images_digits = images.map(DigitPredictor(model_path), name='Predict Digit')

    images_digits.for_each(lambda x : None, name='Noop')

    # Note at this point topo represents the declaration of the
    # streaming application that predicts digits from images.
    # It must be submitted to an execution context, in this case
    # an instance of Streaming Analytics service running on IBM Cloud.

    sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo)
    print(sr)

    # Clean up, the running job has its own copy of the model file
    os.remove('digitmodel.pkl')
Exemplo n.º 8
0
def main():
    """
    Introduction to streaming with scikit-learn.

    Adapts the scikit-learn basic tutorial to
    a streaming environment.

    In a streaming environment events arrive continually
    and as individual items. In this case the digit prediction
    example is adapted to predict a digit as each image arrives.

    The training of the prediction model occurs locally using
    the example digits dataset, while the runtime prediction
    of images occurs in the IBM Cloud using the Streaming
    Analytics service.

    The original scikit-learn tutorial is at:
    http://scikit-learn.org/stable/tutorial/basic/tutorial.html 
    """
    # Load the data and train the model.
    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.)
    clf.fit(digits.data[:-10], digits.target[:-10])

    # Start the streaming application definition
    topo = Topology(namespace='ScikitLearn', name='Images')

    # For use on the service we need to require scikit-learn
    topo.add_pip_package('scikit-learn')
    topo.exclude_packages.add('sklearn')

    # Create a stream of images by cycling through the last
    # ten images (which were excluded from the training)
    # Each tuple on the stream represents a single image.
    images = topo.source(itertools.cycle(digits.data[-10:]), name='Images')

    # Predict the digit from the image using the trained model.
    # The map method declares a stream (images_digits) that is
    # the result of applying a function to each tuple on its
    # input stream (images) 
    #
    # In this case the function is a lambda that predicts the
    # digit for an image using the model clf. Each return
    # from the lambda becomes a tuple on images_digits,
    # in this case a dictionary containing the image and the prediction.
    #
    # Note that the lambda function captures the model (clf)
    # and it will be pickled (using dill) to allow it to
    # be used on the service (which runs in IBM Cloud).
    # 
    images_digits = images.map(lambda image : {'image':image, 'digit':clf.predict(image.reshape(1,-1))[0]}, name='Predict Digit')

    images_digits.for_each(lambda x : None, name='Noop')

    # Note at this point topo represents the declaration of the
    # streaming application that predicts digits from images.
    # It must be submitted to an execution context, in this case
    # an instance of Streaming Analytics service running on IBM Cloud.

    sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo)
    print(sr)
Exemplo n.º 9
0
def main():
    """
    Introduction to streaming with scikit-learn.

    Adapts the scikit-learn basic tutorial to
    a streaming environment.

    In a streaming environment events arrive continually
    and as individual items. In this case the digit prediction
    example is adapted to predict a digit as each image arrives.

    The training of the prediction model occurs locally using
    the example digits dataset, while the runtime prediction
    of images occurs in the IBM Cloud using the Streaming
    Analytics service.

    The original scikit-learn tutorial is at:
    http://scikit-learn.org/stable/tutorial/basic/tutorial.html 
    """
    # Load the data and train the model.
    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.)
    clf.fit(digits.data[:-10], digits.target[:-10])

    # Start the streaming application definition
    topo = Topology(namespace='ScikitLearn', name='Images')

    # For use on the service we need to require scikit-learn
    topo.add_pip_package('scikit-learn')
    topo.exclude_packages.add('sklearn')

    # Create a stream of images by cycling through the last
    # ten images (which were excluded from the training)
    # Each tuple on the stream represents a single image.
    images = topo.source(itertools.cycle(digits.data[-10:]), name='Images')

    # Predict the digit from the image using the trained model.
    # The map method declares a stream (images_digits) that is
    # the result of applying a function to each tuple on its
    # input stream (images)
    #
    # In this case the function is a lambda that predicts the
    # digit for an image using the model clf. Each return
    # from the lambda becomes a tuple on images_digits,
    # in this case a dictionary containing the image and the prediction.
    #
    # Note that the lambda function captures the model (clf)
    # and it will be pickled (using dill) to allow it to
    # be used on the service (which runs in IBM Cloud).
    #
    images_digits = images.map(lambda image: {
        'image': image,
        'digit': clf.predict(image.reshape(1, -1))[0]
    },
                               name='Predict Digit')

    images_digits.for_each(lambda x: None, name='Noop')

    # Note at this point topo represents the declaration of the
    # streaming application that predicts digits from images.
    # It must be submitted to an execution context, in this case
    # an instance of Streaming Analytics service running on IBM Cloud.

    sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo)
    print(sr)