class MultiStream(Topology): spout = MultiStreamSpout.spec(par=2) count_bolt = CountBolt.spec(par=2, inputs={spout: Grouping.fields('word')}, config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10}) stream_aggregator = StreamAggregateBolt.spec(par=1, inputs={spout: Grouping.ALL, spout['error']: Grouping.ALL}, config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 15})
def fields_grouping_builder(topology_name, http_server_url): builder = TestTopologyBuilder(topology_name, http_server_url) ab_spout = builder.add_spout("ab-spout", ABSpout, 1, max_executions=400) count_bolt = builder.add_bolt("count-bolt", WordCountBolt, inputs={ab_spout: Grouping.fields('word')}, par=2) builder.add_bolt("sum-bolt", CountAggregatorBolt, inputs={count_bolt: Grouping.NONE}, par=1) return builder.create_topology()
def fields_grouping_builder(topology_name, http_server_url): builder = TestTopologyBuilder(topology_name, http_server_url) ab_spout = builder.add_spout("ab-spout", ABSpout, 1, max_executions=400) count_bolt = builder.add_bolt("count-bolt", WordCountBolt, inputs={ab_spout: Grouping.fields('word')}, par=2) builder.add_bolt("sum-bolt", CountAggregatorBolt, inputs={count_bolt: Grouping.NONE}, par=1) return builder.create_topology()
def test_fields(self): # sane sane = Grouping.fields(['word', 'count']) self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS")) self.assertEqual(sane.fields, ['word', 'count']) sane = Grouping.fields("just_a_word") self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS")) self.assertEqual(sane.fields, ['just_a_word']) # non-string with self.assertRaises(TypeError): Grouping.fields(['word', 'count', True]) with self.assertRaises(TypeError): Grouping.fields(123) with self.assertRaises(TypeError): Grouping.fields(None) # fields not specified with self.assertRaises(ValueError): Grouping.fields()
def test_fields(self): # sane sane = Grouping.fields(['word', 'count']) self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS")) self.assertEqual(sane.fields, ['word', 'count']) sane = Grouping.fields("just_a_word") self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS")) self.assertEqual(sane.fields, ['just_a_word']) # non-string with self.assertRaises(TypeError): Grouping.fields(['word', 'count', True]) with self.assertRaises(TypeError): Grouping.fields(123) with self.assertRaises(TypeError): Grouping.fields(None) # fields not specified with self.assertRaises(ValueError): Grouping.fields()
def test_is_grouping_sane(self): self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL)) self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE)) self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST)) self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE)) self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS)) sane_fields = Grouping.fields(['hello', 'world']) self.assertTrue(Grouping.is_grouping_sane(sane_fields)) self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM)) sane_custom = Grouping.custom(DummyCustomGrouping()) self.assertTrue(Grouping.is_grouping_sane(sane_custom))
def test_is_grouping_sane(self): self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL)) self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE)) self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST)) self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE)) self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS)) sane_fields = Grouping.fields(['hello', 'world']) self.assertTrue(Grouping.is_grouping_sane(sane_fields)) self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM)) sane_custom = Grouping.custom(DummyCustomGrouping()) self.assertTrue(Grouping.is_grouping_sane(sane_custom))
def test_sanitize_inputs(self): # Note that _sanitize_inputs() should only be called after HeronComponentSpec's # name attribute is set # invalid inputs given as argument (valid ones are either dict, list, tuple or None) invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs="string") with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs=100) with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() # dict <HeronComponentSpec -> Grouping> from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.SHUFFLE}) ret = to_spec._sanitize_inputs() self.assertEqual( ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE}) from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) from_spec.outputs = [Stream(name='another_stream')] to_spec = HeronComponentSpec( "bolt", "bl_clspath", False, 1, inputs={from_spec['another_stream']: Grouping.ALL}) ret = to_spec._sanitize_inputs() self.assertEqual( ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL}) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.ALL}) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # dict <GlobalStreamId -> Grouping> inputs_dict = { GlobalStreamId("some_spout", "some_stream"): Grouping.NONE, GlobalStreamId("another_spout", "default"): Grouping.fields(['word', 'count']) } spec = HeronComponentSpec("bolt", "classpath", False, 1, inputs=inputs_dict) ret = spec._sanitize_inputs() self.assertEqual(ret, inputs_dict) # list of HeronComponentSpec from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1) from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1) to_spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=[from_spec1, from_spec2]) ret = to_spec._sanitize_inputs() self.assertEqual( ret, { GlobalStreamId("spout1", "default"): Grouping.SHUFFLE, GlobalStreamId("spout2", "default"): Grouping.SHUFFLE }) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs=[from_spec]) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # list of GlobalStreamId inputs_list = [ GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream") ] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) ret = spec._sanitize_inputs() self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2))) # list of neither GlobalStreamId nor HeronComponentSpec inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) with self.assertRaises(ValueError): spec._sanitize_inputs()
'''Example WindowSizeTopology''' import sys import heronpy.api.api_constants as constants from heronpy.api.topology import TopologyBuilder from heronpy.api.stream import Grouping from heronpy.api.bolt.window_bolt import SlidingWindowBolt from heron.examples.src.python.spout import WordSpout from examples.src.python.bolt import WindowSizeBolt # Topology is defined using a topology builder # Refer to multi_stream_topology for defining a topology by subclassing Topology if __name__ == '__main__': if len(sys.argv) != 2: print "Topology's name is not specified" sys.exit(1) builder = TopologyBuilder(name=sys.argv[1]) word_spout = builder.add_spout("word_spout", WordSpout, par=2) count_bolt = builder.add_bolt("count_bolt", WindowSizeBolt, par=2, inputs={word_spout: Grouping.fields('word')}, config={SlidingWindowBolt.WINDOW_DURATION_SECS: 10, SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS: 2}) topology_config = {constants.TOPOLOGY_RELIABILITY_MODE: constants.TopologyReliabilityMode.ATLEAST_ONCE} builder.set_config(topology_config) builder.build_and_submit()
import sys import heronpy.api.api_constants as constants from heronpy.api.topology import TopologyBuilder from heronpy.api.stream import Grouping from heronpy.api.bolt.window_bolt import SlidingWindowBolt from heron.examples.src.python.spout import WordSpout from examples.src.python.bolt import WindowSizeBolt # Topology is defined using a topology builder # Refer to multi_stream_topology for defining a topology by subclassing Topology # pylint: disable=superfluous-parens if __name__ == '__main__': if len(sys.argv) != 2: print("Topology's name is not specified") sys.exit(1) builder = TopologyBuilder(name=sys.argv[1]) word_spout = builder.add_spout("word_spout", WordSpout, par=2) count_bolt = builder.add_bolt("count_bolt", WindowSizeBolt, par=2, inputs={word_spout: Grouping.fields('word')}, config={SlidingWindowBolt.WINDOW_DURATION_SECS: 10, SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS: 2}) topology_config = {constants.TOPOLOGY_RELIABILITY_MODE: constants.TopologyReliabilityMode.ATLEAST_ONCE} builder.set_config(topology_config) builder.build_and_submit()
def test_sanitize_inputs(self): # Note that _sanitize_inputs() should only be called after HeronComponentSpec's # name attribute is set # invalid inputs given as argument (valid ones are either dict, list, tuple or None) invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs="string") with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs=100) with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() # dict <HeronComponentSpec -> Grouping> from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.SHUFFLE}) ret = to_spec._sanitize_inputs() self.assertEqual(ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE}) from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) from_spec.outputs = [Stream(name='another_stream')] to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec['another_stream']: Grouping.ALL}) ret = to_spec._sanitize_inputs() self.assertEqual(ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL}) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.ALL}) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # dict <GlobalStreamId -> Grouping> inputs_dict = {GlobalStreamId("some_spout", "some_stream"): Grouping.NONE, GlobalStreamId("another_spout", "default"): Grouping.fields(['word', 'count'])} spec = HeronComponentSpec("bolt", "classpath", False, 1, inputs=inputs_dict) ret = spec._sanitize_inputs() self.assertEqual(ret, inputs_dict) # list of HeronComponentSpec from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1) from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1) to_spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=[from_spec1, from_spec2]) ret = to_spec._sanitize_inputs() self.assertEqual(ret, {GlobalStreamId("spout1", "default"): Grouping.SHUFFLE, GlobalStreamId("spout2", "default"): Grouping.SHUFFLE}) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs=[from_spec]) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # list of GlobalStreamId inputs_list = [GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream")] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) ret = spec._sanitize_inputs() self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2))) # list of neither GlobalStreamId nor HeronComponentSpec inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) with self.assertRaises(ValueError): spec._sanitize_inputs()