def test_get_job(self):
        topo = Topology("job_in_result_test")
        topo.source(["foo"])

        tester = Tester(topo)
        self.tester = tester

        tester.local_check = self._correct_job_ids
        tester.test(self.test_ctxtype, self.test_config)
    def test_get_job(self):
        topo = Topology("job_in_result_test")
        topo.source(["foo"])

        sc = rest.StreamsConnection(username=self.username, password=self.password)
        sc.session.verify = False
        config = {ConfigParams.STREAMS_CONNECTION : sc}

        tester = Tester(topo)
        self.tester = tester

        tester.local_check = self._correct_job_ids
        tester.test(self.test_ctxtype, config)
Exemplo n.º 3
0
def main():
    t = Topology("FFT_Sample")
    readings = t.source(signal_generator.Readings(50)).transform(TumblingWindow(10))
    fftStream = readings.transform(fftpack.fft)
    fftStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
    def test_fn(self):
        topo = Topology()

        s = fn_ecruos(topo)
        self._csl_stream(s, 'source', 'fn_ecruos')

        s = fn_retlif(s)
        self._csl_stream(s, 'filter', 'fn_retlif')

        s = fn_pam(s)
        self._csl_stream(s, 'map', 'fn_pam')

        s = fn_pam_talf(s)
        self._csl_stream(s, 'flat_map', 'fn_pam_talf')
        
        s = fn_gnirts_sa(s)
        self._csl_stream(s, 'as_string', 'fn_gnirts_sa')

        s = fn_nosj_sa(s)
        self._csl_stream(s, 'as_json', 'fn_nosj_sa')

        st = fn_ebircsbus(topo)
        self._csl_stream(st, 'subscribe', 'fn_ebircsbus')

        e = fn_hcae_rof(s)
        self._csl_sink(e, 'for_each', 'fn_hcae_rof')

        e = fn_hsilbup(s)
        self._csl_sink(e, 'publish', 'fn_hsilbup')

        e = fn_hsilbup(topo.source([]), schema=CommonSchema.Json)
        self._csl_sink(e, 'publish', 'fn_hsilbup')

        e = fn_tnirp(s)
        self._csl_sink(e, 'print', 'fn_tnirp')
    def test_fetch_logs_on_failure(self):
        topo = Topology("fetch_logs_on_failure")
        s = topo.source(["foo"])

        tester = Tester(topo)
        # Causes test to fail
        tester.contents(s, ["bar"])

        try:
            self.tester = tester
            tester.local_check = self._can_retrieve_logs
            tester.test(self.test_ctxtype, self.test_config)
        except AssertionError:
            # This test is expected to fail, do nothing.
            pass

        # Check if logs were downloaded
        if self.can_retrieve_logs:
            logs = tester.result['application_logs']
            exists = os.path.isfile(logs)
            
            self.assertTrue(exists, "Application logs were not downloaded on test failure")
            
            if exists:
                os.remove(logs)
    def _test_submit_sab(self):
        topo = Topology('SabTest', namespace='mynamespace')
        s = topo.source([1,2])
        es = s.for_each(lambda x : None)
        bb = streamsx.topology.context.submit('BUNDLE', topo, {})
        self.assertIn('bundlePath', bb)
        self.assertIn('jobConfigPath', bb)

        sas = self.sc.get_streaming_analytics()

        sr = sas.submit_job(bundle=bb['bundlePath'])
        job_id = sr.get('id', sr.get('jobId'))
        self.assertIsNotNone(job_id)
        self.assertIn('name', sr)
        self.assertIn('application', sr)
        self.assertEqual('mynamespace::SabTest', sr['application'])
        cr = sas.cancel_job(job_id=job_id)

        jn = 'SABTEST:' + str(time.time())
        jc = streamsx.topology.context.JobConfig(job_name=jn)
        sr = sas.submit_job(bundle=bb['bundlePath'], job_config=jc)
        job_id = sr.get('id', sr.get('jobId'))
        self.assertIsNotNone(job_id)
        self.assertIn('application', sr)
        self.assertEqual('mynamespace::SabTest', sr['application'])
        self.assertIn('name', sr)
        self.assertEqual(jn, sr['name'])
        cr = sas.cancel_job(job_id=job_id)
       
        os.remove(bb['bundlePath'])
        os.remove(bb['jobConfigPath'])
Exemplo n.º 7
0
def main():
    """
    Sample echo topology application. This Python application builds a
    simple topology that echoes its command line arguments to standard output.

    The application implements the typical pattern
    of code that declares a topology followed by
    submission of the topology to a Streams context.
    
    Args:
        a list of values to print to stdout
        
    Example:
        python3 echo.py hello1 hello2 hello3
    Output:
        hello1
        hello2
        hello3
    """
    
    topo = Topology("echo")
    # The command line arguments (sys.argv) are captured by the SysArgv
    # callable class and will be used at runtime as the contents of the
    # echo stream.
    echo = topo.source(echo_functions.SysArgv(sys.argv[1:]))
    
    # print the echo stream to stdout
    echo.print()
    
    # At this point the topology is declared with a single
    # stream that is printed to stdout
    
    # execute the topology by submitting to a standalone context
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Exemplo n.º 8
0
def main():
    """
    Sample filtering echo topology application. This Python application builds a
    simple topology that echos its command line arguments to standard output.

    This demonstrates use of Python functional logic to filter the tuples.
    A user-defined function implements the filtering logic, in this
    case only echo tuples that start with the letter `d`.

    Args:
        a list of values
        
    Example:
        python3 filter_echo.py cat dog mouse door
    Output:
        dog
        door
    """
    
    topo = Topology("filter_echo")
    source = topo.source(filter_echo_functions.SysArgv(sys.argv[1:]))
    
    # Declare a stream that will execute functional logic
    # against tuples on the echo stream.
    # For each tuple that will appear on echo, the below
    # `starts_with_d` method will be called.  If it returns
    # True then the tuple will appear on the filtered
    # stream, otherwise the tuple is discarded.
    filtered = source.filter(filter_echo_functions.starts_with_d)
    
    filtered.print()
    
    streamsx.topology.context.submit("STANDALONE", topo.graph)
def main():
    """
    Sample transform application.  This Python application builds a topology that
    * transforms a stream of string tuples from a source operator to a stream of integer tuples 
    * uses `transform` to perform addition on the integer tuples
    * prints the stream to stdout
    * submits the topology in standalone mode (compiles and executes it as a standalone application)
    
    Example:
        > python3 transform_sample.py
    Output:
        342
        474
        9342
    """
    
    # create the container for the topology that will hold the streams
    topo = Topology("transform_sample")
    
    # declare a source stream (`source`) that contains string tuples
    source = topo.source(transform_sample_functions.int_strings_transform)
    
    # transform the stream of string tuples (`source`) to a stream of integer tuples (`i1`)
    i1 = source.transform(transform_sample_functions.string_to_int)
    
    # adds 17 to each integer tuple 
    i2 = i1.transform(transform_sample_functions.AddNum(17))
    
    # terminate the stream by printing each tuple to stdout
    i2.print()
    
    # execute the application in standalone mode
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Exemplo n.º 10
0
def main():
    """
    Sample Hello World topology application. This Python application builds a
    simple topology that prints Hello World to standard output.

    The application implements the typical pattern
    of code that declares a topology followed by
    submission of the topology to a Streams context.
    
    This demonstrates the mechanics of declaring a topology and executing it.
            
    Example:
        python3 hello_world.py
    Output:
        Hello
        World!
    """
    
    # Create the container for the topology that will hold the streams of tuples.
    topo = Topology("hello_world")
    
    # Declare a source stream (hw) with string tuples containing two tuples,
    # "Hello" and "World!".
    hw = topo.source(hello_world_functions.source_tuples)
    
    # Sink hw by printing each of its tuples to standard output
    hw.print()
    
    # At this point the topology is declared with a single
    # stream that is printed to standard output
    
    # Now execute the topology by submitting to a standalone context.
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Exemplo n.º 11
0
def main():
    """
    Finds outliers from a sequence of floats (e.g. simulating a sensor reading).
    Demonstrates function logic that maintains state across tuples.
    
    Example:
        python3 find_outliers.py
    Example Output:
        2.753064082105016
        -2.210758753960355
        1.9847958795117937
        2.661689193901883
        2.468061723082693
        ...
    """
    topo = Topology("find_outliers")
    
    # Produce a stream of random float values with a normal
    # distribution, mean 0.0 and standard deviation 1.
    values = topo.source(find_outliers_functions.readings)
    

    # Filters the values based on calculating the mean and standard
    # deviation from the incoming data. In this case only outliers are
    # present in the output stream outliers. An outlier is defined as 
    # more than (threshold * standard deviation) from the mean.  The
    # threshold in this example is 2.0.
    # This demonstrates a functional logic class that is
    # stateful. The threshold, sum_x, and sum_x_squared maintain 
    # their values across multiple invocations.
    outliers = values.filter(find_outliers_functions.IsOutlier(2.0))
    
    outliers.print()
    
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Exemplo n.º 12
0
def main():
    ref_signal = signal.hann(10)

    t = Topology("FFTConvolve_Sample")
    readings = t.source(signal_generator.Readings(10000)).transform(TumblingWindow(1000))
    convolveStream = readings.transform(signal_functions.FFTConvolve(ref_signal))
    convolveStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
Exemplo n.º 13
0
def main():
    ref_signal = signal.hann(10)

    t = Topology("Convolve_Sample")
    readings = t.source(signal_generator.Readings(100)).transform(TumblingWindow(20))
    convolveStream = readings.transform(signal_functions.Convolve(ref_signal))
    convolveStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
    def test_statement(self):
        print ('\n---------'+str(self))
        name = 'test_statement'
        topo = Topology(name)
        streamsx.spl.toolkit.add_toolkit(topo, self.jdbc_toolkit_home)
        s = topo.source(['DROP TABLE STR_SAMPLE']).as_string()
        res_sql = s.map(es.SQLStatement(credentials='eventstore'), schema=CommonSchema.String)
        res_sql.print()

        self._build_only(name, topo)
 def test_param(self):
     topo = Topology()
     s = topo.source(['Hello World']).as_string()
     es.insert(s, connection='9.26.150.75:1101', database='sample_db', table='sample_table')
     es.insert(s, connection='9.26.150.75:1101', database='sample_db', table='sample_table', batch_size=100, max_num_active_batches=5)
     es.insert(s, connection='9.26.150.75:1101', database='sample_db', table='sample_table', batch_size=100, max_num_active_batches=5, front_end_connection_flag=True)
     es.insert(s, connection='9.26.150.75:1101', database='sample_db', table='sample_table', batch_size=100, max_num_active_batches=5, plugin_flag=True)
     es.insert(s, connection='9.26.150.75:1101', database='sample_db', table='sample_table', batch_size=100, max_num_active_batches=5, plugin_flag=False)
     es.insert(s, connection='9.26.150.75:1101', database='sample_db', table='sample_table', batch_size=100, max_num_active_batches=5, plugin_flag='false')
     es.insert(s, connection='9.26.150.75:1101', database='sample_db', table='sample_table', batch_size=100, max_num_active_batches=5, ssl_connection=False)
Exemplo n.º 16
0
    def test_sequence_period(self):
        topo = Topology()
        s = topo.source(U.Sequence(iterations=67, period=0.1))
        E = U.SEQUENCE_SCHEMA.extend(StreamSchema('tuple<float64 d>'))

        s = s.map(_Delta(), schema=E)
        tester = Tester(topo)
        tester.tuple_check(s, lambda x: x['d'] > 0.08)
        tester.tuple_count(s, 67 - 1)
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 17
0
    def test_deduplicate(self):
        topo = Topology()
        s = topo.source([1, 2, 1, 4, 5, 2])
        s = s.map(lambda v: {'a': v}, schema='tuple<int32 a>')
        s = s.map(U.Deduplicate(count=3))
        s = s.map(lambda v: v['a'])

        tester = Tester(topo)
        tester.contents(s, [1, 2, 4, 5, 2])
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 18
0
 def test_bad_lib_param(self):
     creds_file = os.environ['DB2_CREDENTIALS']
     with open(creds_file) as data_file:
         credentials = json.load(data_file)
     topo = Topology()
     s = topo.source(['DROP TABLE STR_SAMPLE']).as_string()
     # expect ValueError because driver class is not default and jdbc_driver_lib is missing
     self.assertRaises(ValueError, db.run_statement, s, credentials, jdbc_driver_class='com.any.DBDriver')
     # expect ValueError because jdbc_driver_lib is not a valid file
     self.assertRaises(ValueError, db.run_statement, s, credentials, jdbc_driver_class='com.any.DBDriver', jdbc_driver_lib='_any_invalid_file_')
Exemplo n.º 19
0
def main():
    """
    The 'Estimator' model accepts a tuple with these elements: (type, X, y), where:
       'type':  't' (for training), 'd' (for data), '' (empty string, same as 'd')
       'X':     is the data
       'y':     is the actual class of the data (only used to train the model)
    """
    training_size = 100
    num_centers = 2
    num_features = 2

    t = Topology("Estimator_Sample")
    trainingStream = t.source(sklearn_sources.Blobs(iterations=training_size, isTraining=True, centers=num_centers, n_features=num_features))
    dataStream = t.source(sklearn_sources.Blobs(centers=num_centers, n_features=num_features))
    combinedStreams = trainingStream.union({dataStream})
    predictionStream = combinedStreams.transform(Estimator(training_size, KNeighborsClassifier()))
    predictionStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
Exemplo n.º 20
0
    def test_map(self):
        topo = Topology()

        s = topo.source(s_none)
        s.map(m_none)
        sr = s.map(m_int)
        self.assertEqual(CommonSchema.String, sr.oport.schema)
        sr = s.map(m_str)
        self.assertEqual(_normalize(SensorReading), sr.oport.schema)
        s.map(m_any)
        s.map(m_sensor)

        s = topo.source(s_int)
        s.map(m_none)
        s.map(m_int)
        self.assertRaises(TypeError, s.map, m_str)
        s.map(m_any)
        self.assertRaises(TypeError, s.map, m_sensor)

        s = topo.source(s_str)
        s.map(m_none)
        self.assertRaises(TypeError, s.map, m_int)
        s.map(m_str)
        s.map(m_any)
        self.assertRaises(TypeError, s.map, m_sensor)

        s = topo.source(s_any)
        s.map(m_none)
        s.map(m_int)
        s.map(m_str)
        s.map(m_any)
        s.map(m_sensor)

        s = topo.source(s_sensor)
        s.map(m_none)
        self.assertRaises(TypeError, s.map, m_int)
        self.assertRaises(TypeError, s.map, m_str)
        s.map(m_any)
        s.map(m_sensor)

        s = topo.source(s_p)
        s.map(m_none)
        self.assertRaises(TypeError, s.map, m_int)
        self.assertRaises(TypeError, s.map, m_str)
        s.map(m_any)
        self.assertRaises(TypeError, s.map, m_sensor)
        sr = s.map(m_p)
        self.assertEqual(CommonSchema.Python, sr.oport.schema)
        self.assertRaises(TypeError, s.map, m_s)

        # Ensure we maintain the hint as well as the schema
        sr.map(m_p)
        self.assertRaises(TypeError, sr.map, m_s)
        sr.map(m_p2s).map(m_s)

        s = topo.source(s_s)
        s.map(m_p)
        s.map(m_s)
Exemplo n.º 21
0
def main():
    filter_order = 4
    cutoffFreq = 100
    sampleRate = 1000

    t = Topology("LowpassFilter_Sample")
    readings = t.source(signal_generator.Readings(50000)).transform(TumblingWindow(2000))
    filterStream = readings.transform(butterworth.Lowpass(filter_order, cutoffFreq, sampleRate))
    filterStream.sink(print)

    streamsx.topology.context.submit("STANDALONE", t.graph)
Exemplo n.º 22
0
    def test_as_string(self):
        topo = Topology()

        s = topo.source(s_none)
        s = s.as_string()

        s.map(m_none)
        self.assertRaises(TypeError, s.map, m_int)
        s.map(m_str)
        s.map(m_any)
        self.assertRaises(TypeError, s.map, m_sensor)
Exemplo n.º 23
0
    def test_non_matching_output(self):
        topo = Topology()
        s = topo.source(U.Sequence(iterations=4))
        matches, non_matches = R.Filter.matching(s,
                                                 filter='seq<2ul',
                                                 non_matching=True)

        tester = Tester(topo)
        tester.tuple_count(matches, 2)
        tester.tuple_count(non_matches, 2)
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 24
0
    def test_flat_map(self):
        topo = Topology()

        s = topo.source(s_none)
        s.flat_map(fm_none)
        s.flat_map(fm_int)
        sr = s.flat_map(fm_str)
        self.assertEqual(_normalize(SensorReading), sr.oport.schema)
        sr.flat_map(fm_sensor)
        s.flat_map(fm_any)
        s.flat_map(fm_sensor)

        s = topo.source(s_int)
        s.flat_map(fm_none)
        s.flat_map(fm_int)
        self.assertRaises(TypeError, s.flat_map, fm_str)
        s.flat_map(fm_any)
        self.assertRaises(TypeError, s.flat_map, fm_sensor)

        s = topo.source(s_str)
        s.flat_map(fm_none)
        self.assertRaises(TypeError, s.flat_map, fm_int)
        sr = s.flat_map(fm_str)
        self.assertEqual(_normalize(SensorReading), sr.oport.schema)
        sr.flat_map(fm_sensor)
        s.flat_map(fm_any)
        self.assertRaises(TypeError, s.flat_map, fm_sensor)

        s = topo.source(s_any)
        s.flat_map(fm_none)
        s.flat_map(fm_int)
        sr = s.flat_map(fm_str)
        self.assertEqual(_normalize(SensorReading), sr.oport.schema)
        sr.flat_map(fm_sensor)
        s.flat_map(fm_any)
        s.flat_map(fm_sensor)

        s = topo.source(s_sensor)
        s.flat_map(fm_none)
        self.assertRaises(TypeError, s.flat_map, fm_int)
        self.assertRaises(TypeError, s.flat_map, fm_str)
        s.flat_map(fm_any)
        s.flat_map(fm_sensor)

        s = topo.source(s_p)
        s.flat_map(fm_none)
        self.assertRaises(TypeError, s.flat_map, fm_int)
        self.assertRaises(TypeError, s.flat_map, fm_str)
        s.flat_map(fm_any)
        self.assertRaises(TypeError, s.flat_map, fm_sensor)
        s.flat_map(fm_p)
        self.assertRaises(TypeError, s.flat_map, fm_s)

        s = topo.source(s_s)
        s.flat_map(fm_p)
        s.flat_map(fm_s)
Exemplo n.º 25
0
def main():
    """
    Sample continuous (streaming) grep topology application. This Python application builds a
    simple topology that periodically polls a directory for files, reads each file and
    output lines that contain the search term.
    Thus as each file is added to the directory, the application will read
    it and output matching lines.
    
    Args:
        directory (string): a directory that contains files to process
        search_string (string): a search term
        
    Example:
        * Create a subdirectory "dir"
        * Create file1.txt in subdirectory "dir" with the following contents:
            file1 line1
            file1 line2
            file1 line3
        * Create file2.txt in subdirectory "dir" with the following contents:
            file2 line1
            file2 line2
            file2 line3
        * python3 grep.py dir line2
        
    Output:
        file1 line2
        file2 line2
    """
    
    if len(sys.argv) != 3:
        print("Usage: python3 grep.py <directory> <search_string>")
        return
    directory = sys.argv[1]
    term = sys.argv[2]
    topo = Topology("grep")
    
    # Declare a stream that will contain the contents of the files.
    # For each input file, DirectoryWatcher opens the file and reads its contents 
    # as a text file, producing a tuple for each line of the file. The tuple contains
    # the contents of the line, as a string.
    lines = topo.source(util_functions.DirectoryWatcher(directory))
    
    # Filter out non-matching lines. FilterLine is a callable class 
    # that will be executed for each tuple on lines, that is each line
    # read from a file.  Only lines that contain the string `term` will
    # be included in the output stream.
    matching = lines.filter(grep_functions.FilterLine(term))
    
    # print the matching lines to standard out
    matching.print()
    
    # execute the topology
    streamsx.topology.context.submit("STANDALONE", topo)
Exemplo n.º 26
0
    def test_request_delete_url_in_input_stream_string_type(self):
        topo = Topology('test_request_delete_url_in_input_stream_string_type')

        s = topo.source(['http://httpbin.org/delete']).as_string()
        res_http = inet.request_delete(s, ssl_accept_all_certificates=True)
        res_http.print()
        tester = Tester(topo)
        tester.tuple_count(res_http, 1)
        tester.run_for(60)
        tester.test(self.test_ctxtype,
                    self.test_config,
                    always_collect_logs=True)
Exemplo n.º 27
0
    def test_transform_filter(self):
        topo = Topology()
        s = topo.source(U.Sequence(iterations=5))
        fo = R.Functor.map(s,
                           StreamSchema('tuple<uint64 seq>'),
                           filter='seq>=2ul')
        r = fo.outputs[0]
        r.print()

        tester = Tester(topo)
        tester.tuple_count(r, 3)
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 28
0
    def test_request_post_url_in_input_stream_string_type(self):
        topo = Topology('test_request_post_url_in_input_stream_string_type')

        s = topo.source(['http://httpbin.org/post']).as_string()
        res_http = inet.request_post(s)
        res_http.print()
        tester = Tester(topo)
        tester.tuple_count(res_http, 1)
        tester.run_for(60)
        tester.test(self.test_ctxtype,
                    self.test_config,
                    always_collect_logs=True)
Exemplo n.º 29
0
    def test_transform_schema(self):
        topo = Topology()
        s = topo.source(U.Sequence(iterations=10))
        A = U.SEQUENCE_SCHEMA.extend(StreamSchema('tuple<rstring a>'))
        fo = R.Functor.map(s, A)
        fo.a = fo.output(fo.outputs[0], '"string value"')
        r = fo.outputs[0]
        r.print()

        tester = Tester(topo)
        tester.tuple_count(r, 10)
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 30
0
    def test_read_write(self):
        topo = Topology()
        s = topo.source(range(13))
        sch = 'tuple<rstring a, int32 b>'
        s = s.map(lambda v: ('A' + str(v), v + 7), schema=sch)
        fn = os.path.join(self.dir, 'data.csv')
        s.for_each(files.CSVWriter(fn))

        tester = Tester(topo)
        tester.tuple_count(s, 13)
        tester.test(self.test_ctxtype, self.test_config)

        self.assertTrue(os.path.isfile(fn))

        topo = Topology()
        r = topo.source(files.CSVReader(schema=sch, file=fn))
        expected = [{'a': 'A' + str(v), 'b': v + 7} for v in range(13)]

        tester = Tester(topo)
        tester.contents(r, expected)
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 31
0
def main():
    """
    Sample continuous (streaming) grep topology application. This Python application builds a
    simple topology that periodically polls a directory for files, reads each file and
    output lines that contain the search term.
    Thus as each file is added to the directory, the application will read
    it and output matching lines.
    
    Args:
        directory (string): a directory that contains files to process
        search_string (string): a search term
        
    Example:
        * Create a subdirectory "dir"
        * Create file1.txt in subdirectory "dir" with the following contents:
            file1 line1
            file1 line2
            file1 line3
        * Create file2.txt in subdirectory "dir" with the following contents:
            file2 line1
            file2 line2
            file2 line3
        * python3 grep.py dir line2
        
    Output:
        file1 line2
        file2 line2
    """

    if len(sys.argv) != 3:
        print("Usage: python3 grep.py <directory> <search_string>")
        return
    directory = sys.argv[1]
    term = sys.argv[2]
    topo = Topology("grep")

    # Declare a stream that will contain the contents of the files.
    # For each input file, DirectoryWatcher opens the file and reads its contents
    # as a text file, producing a tuple for each line of the file. The tuple contains
    # the contents of the line, as a string.
    lines = topo.source(util_functions.DirectoryWatcher(directory))

    # Filter out non-matching lines. FilterLine is a callable class
    # that will be executed for each tuple on lines, that is each line
    # read from a file.  Only lines that contain the string `term` will
    # be included in the output stream.
    matching = lines.filter(grep_functions.FilterLine(term))

    # print the matching lines to standard out
    matching.print()

    # execute the topology
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Exemplo n.º 32
0
    def test_score_bundle(self):
        print('\n---------' + str(self))

        field_mapping = [{
            'model_field': 'Sepal.Length',
            'is_mandatory': True,
            'tuple_field': 'sepal_length'
        }, {
            'model_field': 'Sepal.Width',
            'is_mandatory': True,
            'tuple_field': 'sepal_width'
        }, {
            'model_field': 'Petal.Length',
            'is_mandatory': True,
            'tuple_field': 'petal_length'
        }, {
            'model_field': 'Petal.Width',
            'is_mandatory': True,
            'tuple_field': 'petal_width'
        }]

        deployment_guid = deployment_env_var(
        )  #'a0a04976-9a81-4748-bed0-079890f7c96c'
        space_guid = space_env_var()  #'062c92c1-765e-43b9-801a-629215a0e866'

        name = 'test_score_bundle'
        topo = Topology(name)
        source_stream = topo.source(TestSource())
        # stream of dicts is consumed by wml_online_scoring
        scorings, invalids = wml.wml_online_scoring(
            source_stream,
            deployment_guid,
            field_mapping,
            json.loads(cloud_creds_env_var()),  #wml_credentials,
            space_guid,
            #expected_load = 1000,
            queue_size=2000,
            threads_per_node=1,
            bundle_size=100)

        #print_stream = scorings.map(lambda t: print(str(t)))

        scorings.publish(topic="ScoredRecords")
        invalids.publish(topic="InvalidRecords")

        #res.print()
        if (("TestDistributed" in str(self))
                or ("TestStreamingAnalytics" in str(self))):
            self._launch(topo)
        else:
            # build only
            self._build_only(name, topo)
Exemplo n.º 33
0
    def test_request_get_fixed_url(self):
        topo = Topology('test_request_get_fixed_url')

        url_sample = 'http://httpbin.org/get'
        s = topo.source(['fixed-url-test']).as_string()
        res_http = inet.request_get(s, url_sample)
        res_http.print()
        tester = Tester(topo)
        tester.tuple_count(res_http, 1)
        tester.run_for(60)
        tester.test(self.test_ctxtype,
                    self.test_config,
                    always_collect_logs=True)
Exemplo n.º 34
0
 def test_filter_argcount(self):
     topo = Topology()
     s = topo.source([])
     self.assertRaises(TypeError, s.filter, a_0)
     self.assertRaises(TypeError, s.filter, A_0())
     s.filter(a_1)
     s.filter(A_1())
     s.filter(ao_1)
     s.filter(AO_1())
     self.assertRaises(TypeError, s.filter, a_2)
     self.assertRaises(TypeError, s.filter, A_2())
     s.filter(ao_2)
     s.filter(AO_2())
Exemplo n.º 35
0
 def test_flat_map_argcount(self):
     topo = Topology()
     s = topo.source([])
     self.assertRaises(TypeError, s.flat_map, a_0)
     self.assertRaises(TypeError, s.flat_map, A_0())
     s.flat_map(a_1)
     s.flat_map(A_1())
     s.flat_map(ao_1)
     s.flat_map(AO_1())
     self.assertRaises(TypeError, s.flat_map, a_2)
     self.assertRaises(TypeError, s.flat_map, A_2())
     s.flat_map(ao_2)
     s.flat_map(AO_2())
Exemplo n.º 36
0
 def test_split_argcount(self):
     topo = Topology()
     s = topo.source([])
     self.assertRaises(TypeError, s.split, 2, a_0)
     self.assertRaises(TypeError, s.split, 2, A_0())
     s.split(2, a_1)
     s.split(2, A_1())
     s.split(2, ao_1)
     s.split(2, AO_1())
     self.assertRaises(TypeError, s.split, 2, a_2)
     self.assertRaises(TypeError, s.split, 2, A_2())
     s.split(2, ao_2)
     s.split(2, AO_2())
Exemplo n.º 37
0
    def test_no_out_schema(self):
        print ('\n---------'+str(self))
        name = 'test_no_out_schema'
        creds_file = os.environ['DB2_CREDENTIALS']
        with open(creds_file) as data_file:
            credentials = json.load(data_file)
        topo = Topology(name)
        s = topo.source(['DROP TABLE STR_SAMPLE']).as_string()

        res_sql = s.map(db.JDBCStatement(credentials))
        res_sql.print()

        self._build_only(name, topo)
Exemplo n.º 38
0
 def test_for_each_argcount(self):
     topo = Topology()
     s = topo.source([])
     self.assertRaises(TypeError, s.for_each, a_0)
     self.assertRaises(TypeError, s.for_each, A_0())
     s.for_each(a_1)
     s.for_each(A_1())
     s.for_each(ao_1)
     s.for_each(AO_1())
     self.assertRaises(TypeError, s.for_each, a_2)
     self.assertRaises(TypeError, s.for_each, A_2())
     s.for_each(ao_2)
     s.for_each(AO_2())
    def test_get_job(self):
        topo = Topology("job_in_result_test")
        topo.source(["foo"])

        tester = Tester(topo)
        self.tester = tester

        tester.local_check = self._correct_job_ids
        tester.test(self.test_ctxtype, self.test_config)

        sr = tester.submission_result
        self.assertIn('submitMetrics', sr)
        m = sr['submitMetrics']
        self.assertIn('buildArchiveSize', m)
        self.assertIn('buildArchiveUploadTime_ms', m)
        self.assertIn('totalBuildTime_ms', m)
        self.assertIn('jobSubmissionTime_ms', m)

        self.assertTrue(m['buildArchiveSize'] > 0)
        self.assertTrue(m['buildArchiveUploadTime_ms'] > 0)
        self.assertTrue(m['totalBuildTime_ms'] > 0)
        self.assertTrue(m['jobSubmissionTime_ms'] > 0)
Exemplo n.º 40
0
 def test_aggregate_argcount(self):
     topo = Topology()
     w = topo.source([]).last(1)
     self.assertRaises(TypeError, w.aggregate, a_0)
     self.assertRaises(TypeError, w.aggregate, A_0())
     w.aggregate(a_1)
     w.aggregate(A_1())
     w.aggregate(ao_1)
     w.aggregate(AO_1())
     self.assertRaises(TypeError, w.aggregate, a_2)
     self.assertRaises(TypeError, w.aggregate, A_2())
     w.aggregate(ao_2)
     w.aggregate(AO_2())
Exemplo n.º 41
0
    def test_aggregate(self):
        topo = Topology()

        w = topo.source(s_none).last()
        w.aggregate(agg_none)
        sr = w.aggregate(agg_int)
        self.assertEqual(CommonSchema.String, sr.oport.schema)
        sr = w.aggregate(agg_str)
        self.assertEqual(_normalize(SensorReading), sr.oport.schema)
        w.aggregate(agg_any)
        w.aggregate(agg_sensor)

        w = topo.source(s_int).last()
        w.aggregate(agg_none)
        w.aggregate(agg_int)
        #self.assertRaises(TypeError, w.aggregate, agg_str)
        w.aggregate(agg_any)
        #self.assertRaises(TypeError, w.aggregate, agg_sensor)

        w = topo.source(s_str).last()
        w.aggregate(agg_none)
        #self.assertRaises(TypeError, w.aggregate, agg_int)
        w.aggregate(agg_str)
        w.aggregate(agg_any)
        #self.assertRaises(TypeError, w.aggregate, agg_sensor)

        w = topo.source(s_any).last()
        w.aggregate(agg_none)
        w.aggregate(agg_int)
        w.aggregate(agg_str)
        w.aggregate(agg_any)
        w.aggregate(agg_sensor)

        w = topo.source(s_sensor).last()
        w.aggregate(agg_none)
        #self.assertRaises(TypeError, w.aggregate, agg_int)
        #self.assertRaises(TypeError, w.aggregate, agg_str)
        w.aggregate(agg_any)
        w.aggregate(agg_sensor)

        w = topo.source(s_p).last()
        w.aggregate(agg_none)
        #self.assertRaises(TypeError, w.aggregate, agg_int)
        #self.assertRaises(TypeError, w.aggregate, agg_str)
        w.aggregate(agg_any)
        #self.assertRaises(TypeError, w.aggregate, agg_sensor)
        sr = w.aggregate(agg_p)
        self.assertEqual(CommonSchema.Python, sr.oport.schema)
        #self.assertRaises(TypeError, w.aggregate, agg_s)

        w = topo.source(s_s).last()
        w.aggregate(agg_p)
        w.aggregate(agg_s)
Exemplo n.º 42
0
    def test_spray(self):
        topo = Topology()
        s = topo.source(U.Sequence(iterations=2442))
        outs = []
        for so in U.spray(s, count=7):
            outs.append(
                so.map(lambda x: (x['seq'], x['ts']),
                       schema=U.SEQUENCE_SCHEMA))

        s = outs[0].union(set(outs))

        tester = Tester(topo)
        tester.tuple_count(s, 2442)
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 43
0
    def test_pair(self):
        topo = Topology()
        s = topo.source(U.Sequence(iterations=932))
        rschema = U.SEQUENCE_SCHEMA.extend(
            StreamSchema('tuple<float64 score>'))
        r0 = s.map(lambda t: (t['seq'], t['ts'], 1.0), schema=rschema)
        r1 = s.map(lambda t: (t['seq'], t['ts'], 2.0), schema=rschema)

        r = U.pair(r0, r1)

        tester = Tester(topo)
        tester.tuple_count(r, 932 * 2)
        tester.tuple_check(r, PairCheck())
        tester.test(self.test_ctxtype, self.test_config)
Exemplo n.º 44
0
    def test_all_hdsf_operators(self):
        hdfs_cfg_file = os.environ['HDFS_SITE_XML']
        # credentials is the path to the HDSF *configuration file 'hdfs-site.xml'
        topo = Topology('test_all_hdsf_operators')

        if self.hdfs_toolkit_location is not None:
            tk.add_toolkit(topo, self.hdfs_toolkit_location)

        # creates an input stream
        fileSinkInputStream = topo.source(
            ['This line will be written into a HDFS file.']).as_string()

        # writes a line into a HDFS file (HDFS2FileSink)
        fileSinkResults = hdfs.write(fileSinkInputStream,
                                     credentials=hdfs_cfg_file,
                                     file='pytest1/sample4%FILENUM.txt')
        fileSinkResults.print(name='printFileSinkResults')

        # scans an HDFS directory and return file names (HDFS2DirectoryScan)
        scannedFileNames = hdfs.scan(topo,
                                     credentials=hdfs_cfg_file,
                                     directory='pytest1',
                                     pattern='sample.*txt',
                                     init_delay=10)
        scannedFileNames.print(name='printScannedFileNames')

        # reads lines from a HDFS file (HDFS2FileSource)
        readLines = hdfs.read(scannedFileNames, credentials=hdfs_cfg_file)
        readLines.print(name='printReadLines')

        # copies files from HDFS into local disk "/tmp/" (HDFS2FileCopy)
        copyFileResults = hdfs.copy(scannedFileNames,
                                    credentials=hdfs_cfg_file,
                                    direction='copyToLocalFile',
                                    hdfsFile=None,
                                    hdfsFileAttrName='fileName',
                                    localFile='/tmp/')
        copyFileResults.print(name='printCopyFileResults')

        tester = Tester(topo)
        tester.tuple_count(readLines, 1, exact=False)
        # tester.run_for(80)

        cfg = {}
        job_config = streamsx.topology.context.JobConfig(tracing='info')
        job_config.add(cfg)
        cfg[streamsx.topology.context.ConfigParams.SSL_VERIFY] = False

        # Run the test
        tester.test(self.test_ctxtype, cfg, always_collect_logs=True)
    def test_spl_generation(self):
        name = 'test_spl_generation'
        topo = Topology(name)
        scriptname = os.path.basename(__file__)[:-3]
        service_documentation={
          'title': '__SERVICE__TITLE__',
          'description': '__SERVICE__DESC__',
        }
        endpoint_documentation = dict()
        endpoint_documentation['summary'] = '__ENDPOINT__SUMMARY__'
        endpoint_documentation['tags'] = ['__ENDPOINT__TAG1__', '__ENDPOINT__TAG2__']
        endpoint_documentation['description'] = '__ENDPOINT__DESC__'
        doc_attr = dict()
        descr = {'id': {'description': '__ENDPOINT__ATTR1__DESC__'}}
        doc_attr.update(descr)
        descr = {'num': {'description': '__ENDPOINT__ATTR2__DESC__'}}
        doc_attr.update(descr)
        endpoint_documentation['attributeDescriptions'] = doc_attr

        stream1 = topo.source(lambda : itertools.count()).as_string()
        stream1.for_each(EndpointSink(
            consuming_reads=True,
            service_documentation=service_documentation,
            endpoint_documentation=endpoint_documentation))

        submission_result = submit(ContextTypes.TOOLKIT, topo)
        # check generated SPL file
        splfile = submission_result['toolkitRoot']+'/'+scriptname+'/'+name+'.spl'
        with open(splfile, "r") as fileHandle:
            ep_annotation = [line.strip() for line in fileHandle if "@endpoint" in line]
        print(str(ep_annotation))
        self.assertTrue('__ENDPOINT__SUMMARY__' in str(ep_annotation), msg=ep_annotation)
        self.assertTrue('__ENDPOINT__TAG1__' in str(ep_annotation), msg=ep_annotation)
        self.assertTrue('__ENDPOINT__TAG2__' in str(ep_annotation), msg=ep_annotation)
        self.assertTrue('__ENDPOINT__DESC__' in str(ep_annotation), msg=ep_annotation)
        self.assertTrue('__ENDPOINT__ATTR1__DESC__' in str(ep_annotation), msg=ep_annotation)
        self.assertTrue('__ENDPOINT__ATTR2__DESC__' in str(ep_annotation), msg=ep_annotation)
        with open(splfile, "r") as fileHandle:
            service_annotation = [line.strip() for line in fileHandle if "@service" in line]
        print(str(service_annotation))
        self.assertTrue('__SERVICE__TITLE__' in str(service_annotation), msg=service_annotation)
        self.assertTrue('__SERVICE__DESC__' in str(service_annotation), msg=service_annotation)
        # test consumingReads operator parameter in generated SPL code
        with open(splfile, "r") as fileHandle:
            sink_invocation = [line.strip() for line in fileHandle if "spl.endpoint::EndpointSink" in line or 'param' in line or 'consumingReads' in line]
        print(str(sink_invocation))
        self.assertTrue('spl.endpoint::EndpointSink' in sink_invocation[-3]
                        and 'param' in sink_invocation[-2]
                        and 'consumingReads' in sink_invocation[-1] and 'true' in sink_invocation[-1],
                        msg=sink_invocation)
    def test_always_fetch_logs(self):
        topo = Topology("always_fetch_logs")
        s = topo.source(["foo"])

        tester = Tester(topo)
        tester.contents(s, ["foo"])

        tester.test(self.test_ctxtype, self.test_config, always_collect_logs=True)

        # Check if logs were downloaded
        logs = tester.result['application_logs']
        exists = os.path.isfile(logs)

        self.assertTrue(exists, "Application logs were not downloaded on test success")

        if exists:
            os.remove(logs)
    def test_always_fetch_logs(self):
        topo = Topology("always_fetch_logs")
        s = topo.source(["foo"])

        tester = Tester(topo)
        tester.contents(s, ["foo"])

        self.tester = tester
        tester.local_check = self._can_retrieve_logs
        tester.test(self.test_ctxtype, self.test_config, always_collect_logs=True)

        if self.can_retrieve_logs:
            # streams version is >= 4.2.4. Fetching logs is supported.
            # Check if logs were downloaded
            logs = tester.result['application_logs']
            exists = os.path.isfile(logs)

            self.assertTrue(exists, "Application logs were not downloaded on test success")
            
            if exists:
                os.remove(logs)                            
Exemplo n.º 48
0
def main():
    """
    Plays Fizz Buzz (https://en.wikipedia.org/wiki/Fizz_buzz)
    
    Example:
        python3 fizz_buzz.py
    Output:
        1
        2
        Fizz!
        4
        Buzz!
        Fizz!
        7
        8
        Fizz!
        Buzz!
        11
        Fizz!
        13
        14
        FizzBuzz!
        ...

    """
    topo = Topology("fizz_buzz")
    
    # Declare a stream of int values
    counting = topo.source(fizz_buzz_functions.int_tuples)
    
    # Print the tuples to standard output
    play_fizz_buzz(counting).print()
    
    # At this point the streaming topology (streaming) is
    # declared, but no data is flowing. The topology
    # must be submitted to a context to be executed.
    
    # execute the topology by submitting to a standalone context
    streamsx.topology.context.submit("STANDALONE", topo.graph)
Exemplo n.º 49
0
    def test_scikit_learn(self):
        """Verify basic scikit-learn tutorial code works as a stream."""
        digits = datasets.load_digits()
        clf = svm.SVC(gamma=0.001, C=100.)
        clf.fit(digits.data[:-10], digits.target[:-10])

        expected = []
        for i in digits.data[-10:]:
            d = clf.predict(i.reshape(1,-1))
            expected.append(d[0])

        topo = Topology()

        topo.add_pip_package('scikit-learn')
        topo.exclude_packages.add('sklearn')

        images = topo.source(digits.data[-10:], name='Images')
        images_digits = images.map(lambda image : clf.predict(image.reshape(1,-1))[0], name='Predict Digit')

        tester = Tester(topo)
        tester.contents(images_digits, expected)
        tester.tuple_count(images_digits, 10)
        tester.test(self.test_ctxtype, self.test_config)
    def test_class(self):
        topo = Topology()
        ct = CallTopo()

        s = ct.ecruos(topo)
        self._csl_stream(s, 'source', 'ecruos', cls='CallTopo')

        s = ct.retlif(s)
        self._csl_stream(s, 'filter', 'retlif', cls='CallTopo')

        s = ct.pam(s)
        self._csl_stream(s, 'map', 'pam', cls='CallTopo')

        s = ct.pam_talf(s)
        self._csl_stream(s, 'flat_map', 'pam_talf', cls='CallTopo')
        
        s = ct.gnirts_sa(s)
        self._csl_stream(s, 'as_string', 'gnirts_sa', cls='CallTopo')

        s = ct.nosj_sa(s)
        self._csl_stream(s, 'as_json', 'nosj_sa', cls='CallTopo')

        st = ct.ebircsbus(topo)
        self._csl_stream(st, 'subscribe', 'ebircsbus', cls='CallTopo')

        e = ct.hcae_rof(s)
        self._csl_sink(e, 'for_each', 'hcae_rof', cls='CallTopo')

        e = ct.hsilbup(s)
        self._csl_sink(e, 'publish', 'hsilbup', cls='CallTopo')

        # test with implict schema change
        e = ct.hsilbup(topo.source([]), schema=CommonSchema.Json)
        self._csl_sink(e, 'publish', 'hsilbup', cls='CallTopo')

        e = ct.tnirp(s)
        self._csl_sink(e, 'print', 'tnirp', cls='CallTopo')
Exemplo n.º 51
0
def main():
    """
    Sample temperature sensor topology application.  This Python application builds a 
    simple topology that prints an infinite stream of random numbers to standard
    output.

    The application implements the typical pattern
    of code that declares a topology followed by
    submission of the topology to a Streams context.
               
    Example:
        python3 temperature_sensor.py
    Output:
        ...
        0.3235259780332219
        1.7694181431337437
        0.27741668353194443
        -0.18827948813268522
        0.9576092897071428
        -0.8918033752738117
        -1.4946580133821907
        ...
        (Ctlr-C to exit)
    """
    
    # Create the container for the topology that will hold the streams of tuples.
    topo = Topology("temperature_sensor")
    
    # Declare an infinite stream of random numbers
    source = topo.source(temperature_sensor_functions.readings)
    
    # Sink the stream by printing each of its tuples to standard output
    source.print()
    
    # Now execute the topology by submitting to a standalone context.
    streamsx.topology.context.submit("STANDALONE", topo.graph)
def main():
    """
    Sample continuous (streaming) regular expression grep topology application.
    This Python application builds a simple topology that periodically polls a 
    directory for files, reads each file and output lines that match a regular
    expression.
    The matching is done on a stream parallelized into 5 parallel channels.
    Tuples are routed to parallel channels such that an even distribution is
    maintained.
    
    Args:
        directory (string): a directory that contains files to process
        search_pattern (string): a search pattern
        
    Example:
        * In addition to including the `com.ibm.streamsx.topology/opt/python/packages`
          directory in the PYTHONPATH environment variable, also include the
          `samples/python/topology/simple` directory.
        * Create a subdirectory "dir"
        * Create file1.txt in subdirectory "dir" with the following contents:
            file1 line1
            file1 line2
            file1 line3
        * Create file2.txt in subdirectory "dir" with the following contents:
            file2 line1
            file2 line2
            file2 line3
        * python3 parallel_regex_grep.py dir line[1-2]
        
    Example Output (intermixed):
        file2 line1
        file2 line2
        file1 line1
        file1 line2
        
        LineCounter@139676451944432 has sent ...
        LineCounter@139676451944432 has sent 6 lines to be filtered.                   <== The source operator produced a total of 6 tuples
        
        1. FilterLine@139676451362072 has received 1 lines on this parallel channel.   <== 5 filter operators are created, one for each parallel channel.
        2. FilterLine@139676441656064 has received 1 lines on this parallel channel.       4 operators processed 1 tuple each.
        3. FilterLine@139676441211568 has received 1 lines on this parallel channel.       1 operator processed 2 tuples.
        4. FilterLine@139676441211848 has received 1 lines on this parallel channel.
        5. FilterLine@139676441655728 has received ...                                  
           FilterLine@139676441655728 has received 2 lines on this parallel channel.
           
    """
    if len(sys.argv) != 3:
        print("Usage: python3 parallel_regex_grep.py <directory> <search_pattern>")
        return
    directory = sys.argv[1]
    pattern = sys.argv[2]
    
    # Define the topology
    topo = Topology("parallel_regex_grep")
    
    # Declare a stream with tuples that are string objects
    # All files in a directory are read, resulting in lines of text
    # Each line is a tuple in the stream
    lines = topo.source(util_functions.DirectoryWatcher(directory))
    
    # Count the total number of lines before they are split between
    # different parallel channels.
    lines_counter = lines.transform(parallel_regex_grep_functions.LineCounter())

    # Parallelize the Stream.
    # Since there are 5 channels of the stream, the approximate number of
    # lines sent to each channel should be numSentStrings/5. This can be
    # verified by comparing the outputs of the lines_counter stream to that
    # of the parallel channels.
    lines_parallel = lines_counter.parallel(5);
    
    # Filter for the matched string, and print the number strings that have
    # been tested. This is happening in parallel.
    filtered_parallel = lines_parallel.filter(parallel_regex_grep_functions.FilterLine(pattern))
    
    # Join the results of each parallel filter into one stream,
    # merging the parallel streams back into one stream.
    filtered_condensed = filtered_parallel.end_parallel();
    
    # Print the combined results
    filtered_condensed.print()
    
    # Execute the topology
    streamsx.topology.context.submit("STANDALONE", topo.graph)
def main():
    """
    This is a variant of images.py that loads the model from a file.

    Here the Streams application is declared using a model
    contained in a file. This is a typical pattern where
    the model is created off-line and saved to a file.
    Subsequently applications load the file to perform predictions.

    Comments are mainly focused on the model loading, see
    images.py for details on other statements.

    http://scikit-learn.org/stable/modules/model_persistence.html
    """
    # Load the data and train the model.
    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.)
    clf.fit(digits.data[:-10], digits.target[:-10])

    # Persist the model as a file
    joblib.dump(clf, 'digitmodel.pkl')

    # Just to ensure we are not referencing the local
    # instance of the model, we will load the model at
    # runtime from the file.
    clf = None

    topo = Topology(namespace='ScikitLearn', name='ImagesModelFile')

    topo.add_pip_package('scikit-learn')
    topo.exclude_packages.add('sklearn')

    images = topo.source(itertools.cycle(digits.data[-10:]), name='Images')

    # Add the model to the topology. This will take a copy
    # of the file and make it available when the job
    # is running. The returned path is relative to the
    # job's application directory. See DigitPredictor() for
    # how it is used.
    model_path = topo.add_file_dependency('digitmodel.pkl', 'etc')

    # Predict the digit from the image using the trained model.
    # The map method declares a stream (images_digits) that is
    # the result of applying a function to each tuple on its
    # input stream (images) 
    #
    # At runtime we need to load the model from the file so instead
    # of a stateless lambda function we use an instance a class.
    # This class (DigitPredictor) has the model path as its state
    # and will load the model from the file when the job is excuting
    # in the IBM Cloud.
    images_digits = images.map(DigitPredictor(model_path), name='Predict Digit')

    images_digits.for_each(lambda x : None, name='Noop')

    # Note at this point topo represents the declaration of the
    # streaming application that predicts digits from images.
    # It must be submitted to an execution context, in this case
    # an instance of Streaming Analytics service running on IBM Cloud.

    sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo)
    print(sr)

    # Clean up, the running job has its own copy of the model file
    os.remove('digitmodel.pkl')
Exemplo n.º 54
0
 def test_keep_schema_json(self):
     topo = Topology()
     s = topo.source([]).as_json()
     self._check_kept(s)
Exemplo n.º 55
0
 def test_keep_schema_string(self):
     topo = Topology()
     s = topo.source([]).as_string()
     self._check_kept(s)
def main():
    """
    Sample continuous (streaming) regular expression grep topology application.
    This Python application builds a simple topology that periodically polls a 
    directory for files, reads each file and output lines that match a regular
    expression.
    
    The matching is done on a stream parallelized into 5 parallel channels.
    Tuples will be consistently routed to the same channel based upon their 
    hash value. Each channel of the parallel region only receives tuples that 
    have the same hash value using the built-in hash() function.
    
    For this sample, if you read from a file that contains the following:
    Apple
    Orange
    Banana
    Banana
    Apple
    Apple
    you will notice that the lines containing Apple will always be sent to the 
    same channel of the parallel region.  Similarly, lines containing Banana
    will be sent to the same channel, and lines containing Orange will be
    sent to the same channel.
    
    Args:
        directory (string): a directory that contains files to process
        search_pattern (string): a search pattern
        
    Example:
        * In addition to including the `com.ibm.streamsx.topology/opt/python/packages`
          directory in the PYTHONPATH environment variable, also include the
          `samples/python/topology/simple` directory.
        * Create a subdirectory "dir"
        * Create file3.txt in subdirectory "dir" with the following contents:
          Apple
          Orange
          Banana
          Banana
          Apple
          Apple
        * python3 partitioned_parallel_regex_grep.py dir 'Apple|Banana'
        
    Example Output (intermixed):
          Apple
          Apple
          Apple
          Banana
          Banana

                                                                                          3 filter operators are created:
          1. FilterLine@139865292777904 testing string "Apple" for the pattern.           <== 1 operator processes tuples containing "Apple"
             FilterLine@139865292777904 testing string "Apple" for the pattern.
             FilterLine@139865292777904 testing string "Apple" for the pattern.

          2. FilterLine@139865292777792 testing string "Orange" for the pattern.          <== 1 operator processes tuples containing "Orange"
          
          3. FilterLine@139865298606120 testing string "Banana" for the pattern.          <== 1 operator processes tuples containing "Banana"
             FilterLine@139865298606120 testing string "Banana" for the pattern.
    """
    if len(sys.argv) != 3:
        print("Usage: python3 partitioned_parallel_regex_grep.py <directory> <search_pattern>")
        return
    directory = sys.argv[1]
    pattern = sys.argv[2]
    
    # Define the topology
    topo = Topology("partitioned_parallel_regex_grep")
    
    # Declare a stream with tuples that are string objects
    # All files in a directory are read, resulting in lines of text
    # Each line is a tuple in the stream
    lines = topo.source(util_functions.DirectoryWatcher(directory))
    
    # Parallelize the stream into 5 parallel channels
    # The hash value of the tuple is used to route the tuple to a corresponding 
    # channel, so that all tuples with the same hash value are sent to the same
    # channel.
    lines_parallel = lines.parallel(5, Routing.HASH_PARTITIONED)
    
    # Filter for the matched string, and print the number strings that have
    # been tested. This is happening in parallel.
    filtered_parallel = lines_parallel.filter(partitioned_parallel_regex_grep_functions.FilterLine(pattern))
    
    # Join the results of each parallel filter into one stream,
    # merging the parallel streams back into one stream.
    filtered_condensed = filtered_parallel.end_parallel();
    
    # Print the combined results
    filtered_condensed.print()
    
    # Execute the topology
    streamsx.topology.context.submit("STANDALONE", topo)
Exemplo n.º 57
0
 def test_keep_schema_schema(self):
     topo = Topology()
     s = topo.source([]).map(lambda x : x, schema='tuple<rstring a, int32 b>')
     self._check_kept(s)
Exemplo n.º 58
0
def main():
    """
    Introduction to streaming with scikit-learn.

    Adapts the scikit-learn basic tutorial to
    a streaming environment.

    In a streaming environment events arrive continually
    and as individual items. In this case the digit prediction
    example is adapted to predict a digit as each image arrives.

    The training of the prediction model occurs locally using
    the example digits dataset, while the runtime prediction
    of images occurs in the IBM Cloud using the Streaming
    Analytics service.

    The original scikit-learn tutorial is at:
    http://scikit-learn.org/stable/tutorial/basic/tutorial.html 
    """
    # Load the data and train the model.
    digits = datasets.load_digits()
    clf = svm.SVC(gamma=0.001, C=100.)
    clf.fit(digits.data[:-10], digits.target[:-10])

    # Start the streaming application definition
    topo = Topology(namespace='ScikitLearn', name='Images')

    # For use on the service we need to require scikit-learn
    topo.add_pip_package('scikit-learn')
    topo.exclude_packages.add('sklearn')

    # Create a stream of images by cycling through the last
    # ten images (which were excluded from the training)
    # Each tuple on the stream represents a single image.
    images = topo.source(itertools.cycle(digits.data[-10:]), name='Images')

    # Predict the digit from the image using the trained model.
    # The map method declares a stream (images_digits) that is
    # the result of applying a function to each tuple on its
    # input stream (images) 
    #
    # In this case the function is a lambda that predicts the
    # digit for an image using the model clf. Each return
    # from the lambda becomes a tuple on images_digits,
    # in this case a dictionary containing the image and the prediction.
    #
    # Note that the lambda function captures the model (clf)
    # and it will be pickled (using dill) to allow it to
    # be used on the service (which runs in IBM Cloud).
    # 
    images_digits = images.map(lambda image : {'image':image, 'digit':clf.predict(image.reshape(1,-1))[0]}, name='Predict Digit')

    images_digits.for_each(lambda x : None, name='Noop')

    # Note at this point topo represents the declaration of the
    # streaming application that predicts digits from images.
    # It must be submitted to an execution context, in this case
    # an instance of Streaming Analytics service running on IBM Cloud.

    sr = streamsx.topology.context.submit('STREAMING_ANALYTICS_SERVICE', topo)
    print(sr)