Пример #1
0
class MultiStream(Topology):
  spout = MultiStreamSpout.spec(par=2)
  count_bolt = CountBolt.spec(par=2,
                              inputs={spout: Grouping.fields('word')},
                              config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10})
  stream_aggregator = StreamAggregateBolt.spec(par=1,
                                               inputs={spout: Grouping.ALL,
                                                       spout['error']: Grouping.ALL},
                                               config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 15})
Пример #2
0
def fields_grouping_builder(topology_name, http_server_url):
  builder = TestTopologyBuilder(topology_name, http_server_url)
  ab_spout = builder.add_spout("ab-spout", ABSpout, 1, max_executions=400)

  count_bolt = builder.add_bolt("count-bolt", WordCountBolt,
                                inputs={ab_spout: Grouping.fields('word')}, par=2)

  builder.add_bolt("sum-bolt", CountAggregatorBolt,
                   inputs={count_bolt: Grouping.NONE}, par=1)

  return builder.create_topology()
Пример #3
0
def fields_grouping_builder(topology_name, http_server_url):
  builder = TestTopologyBuilder(topology_name, http_server_url)
  ab_spout = builder.add_spout("ab-spout", ABSpout, 1, max_executions=400)

  count_bolt = builder.add_bolt("count-bolt", WordCountBolt,
                                inputs={ab_spout: Grouping.fields('word')}, par=2)

  builder.add_bolt("sum-bolt", CountAggregatorBolt,
                   inputs={count_bolt: Grouping.NONE}, par=1)

  return builder.create_topology()
Пример #4
0
    def test_fields(self):
        # sane
        sane = Grouping.fields(['word', 'count'])
        self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
        self.assertEqual(sane.fields, ['word', 'count'])

        sane = Grouping.fields("just_a_word")
        self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
        self.assertEqual(sane.fields, ['just_a_word'])

        # non-string
        with self.assertRaises(TypeError):
            Grouping.fields(['word', 'count', True])
        with self.assertRaises(TypeError):
            Grouping.fields(123)
        with self.assertRaises(TypeError):
            Grouping.fields(None)

        # fields not specified
        with self.assertRaises(ValueError):
            Grouping.fields()
Пример #5
0
  def test_fields(self):
    # sane
    sane = Grouping.fields(['word', 'count'])
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
    self.assertEqual(sane.fields, ['word', 'count'])

    sane = Grouping.fields("just_a_word")
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
    self.assertEqual(sane.fields, ['just_a_word'])

    # non-string
    with self.assertRaises(TypeError):
      Grouping.fields(['word', 'count', True])
    with self.assertRaises(TypeError):
      Grouping.fields(123)
    with self.assertRaises(TypeError):
      Grouping.fields(None)

    # fields not specified
    with self.assertRaises(ValueError):
      Grouping.fields()
Пример #6
0
    def test_is_grouping_sane(self):
        self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL))
        self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE))
        self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST))
        self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE))

        self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS))
        sane_fields = Grouping.fields(['hello', 'world'])
        self.assertTrue(Grouping.is_grouping_sane(sane_fields))

        self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM))
        sane_custom = Grouping.custom(DummyCustomGrouping())
        self.assertTrue(Grouping.is_grouping_sane(sane_custom))
Пример #7
0
  def test_is_grouping_sane(self):
    self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE))

    self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS))
    sane_fields = Grouping.fields(['hello', 'world'])
    self.assertTrue(Grouping.is_grouping_sane(sane_fields))

    self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM))
    sane_custom = Grouping.custom(DummyCustomGrouping())
    self.assertTrue(Grouping.is_grouping_sane(sane_custom))
Пример #8
0
    def test_sanitize_inputs(self):
        # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
        # name attribute is set

        # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs="string")
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs=100)
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        # dict <HeronComponentSpec -> Grouping>
        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.SHUFFLE})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        from_spec.outputs = [Stream(name='another_stream')]
        to_spec = HeronComponentSpec(
            "bolt",
            "bl_clspath",
            False,
            1,
            inputs={from_spec['another_stream']: Grouping.ALL})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.ALL})
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # dict <GlobalStreamId -> Grouping>
        inputs_dict = {
            GlobalStreamId("some_spout", "some_stream"):
            Grouping.NONE,
            GlobalStreamId("another_spout", "default"):
            Grouping.fields(['word', 'count'])
        }
        spec = HeronComponentSpec("bolt",
                                  "classpath",
                                  False,
                                  1,
                                  inputs=inputs_dict)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, inputs_dict)

        # list of HeronComponentSpec
        from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
        from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_cls",
                                     False,
                                     1,
                                     inputs=[from_spec1, from_spec2])
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {
                GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                GlobalStreamId("spout2", "default"): Grouping.SHUFFLE
            })

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs=[from_spec])
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # list of GlobalStreamId
        inputs_list = [
            GlobalStreamId("spout1", "default"),
            GlobalStreamId("spout2", "some_stream")
        ]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

        # list of neither GlobalStreamId nor HeronComponentSpec
        inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        with self.assertRaises(ValueError):
            spec._sanitize_inputs()
'''Example WindowSizeTopology'''
import sys

import heronpy.api.api_constants as constants
from heronpy.api.topology import TopologyBuilder
from heronpy.api.stream import Grouping
from heronpy.api.bolt.window_bolt import SlidingWindowBolt
from heron.examples.src.python.spout import WordSpout
from examples.src.python.bolt import WindowSizeBolt

# Topology is defined using a topology builder
# Refer to multi_stream_topology for defining a topology by subclassing Topology
if __name__ == '__main__':
  if len(sys.argv) != 2:
    print "Topology's name is not specified"
    sys.exit(1)

  builder = TopologyBuilder(name=sys.argv[1])

  word_spout = builder.add_spout("word_spout", WordSpout, par=2)
  count_bolt = builder.add_bolt("count_bolt", WindowSizeBolt, par=2,
                                inputs={word_spout: Grouping.fields('word')},
                                config={SlidingWindowBolt.WINDOW_DURATION_SECS: 10,
                                        SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS: 2})

  topology_config = {constants.TOPOLOGY_RELIABILITY_MODE:
                         constants.TopologyReliabilityMode.ATLEAST_ONCE}
  builder.set_config(topology_config)

  builder.build_and_submit()
Пример #10
0
import sys

import heronpy.api.api_constants as constants
from heronpy.api.topology import TopologyBuilder
from heronpy.api.stream import Grouping
from heronpy.api.bolt.window_bolt import SlidingWindowBolt
from heron.examples.src.python.spout import WordSpout
from examples.src.python.bolt import WindowSizeBolt

# Topology is defined using a topology builder
# Refer to multi_stream_topology for defining a topology by subclassing Topology
# pylint: disable=superfluous-parens
if __name__ == '__main__':
  if len(sys.argv) != 2:
    print("Topology's name is not specified")
    sys.exit(1)

  builder = TopologyBuilder(name=sys.argv[1])

  word_spout = builder.add_spout("word_spout", WordSpout, par=2)
  count_bolt = builder.add_bolt("count_bolt", WindowSizeBolt, par=2,
                                inputs={word_spout: Grouping.fields('word')},
                                config={SlidingWindowBolt.WINDOW_DURATION_SECS: 10,
                                        SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS: 2})

  topology_config = {constants.TOPOLOGY_RELIABILITY_MODE:
                         constants.TopologyReliabilityMode.ATLEAST_ONCE}
  builder.set_config(topology_config)

  builder.build_and_submit()
Пример #11
0
  def test_sanitize_inputs(self):
    # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
    # name attribute is set

    # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
    invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs="string")
    with self.assertRaises(TypeError):
      invalid_spec._sanitize_inputs()

    invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs=100)
    with self.assertRaises(TypeError):
      invalid_spec._sanitize_inputs()

    # dict <HeronComponentSpec -> Grouping>
    from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec: Grouping.SHUFFLE})
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

    from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
    from_spec.outputs = [Stream(name='another_stream')]
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec['another_stream']: Grouping.ALL})
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

    # HeronComponentSpec's name attribute not set
    from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec: Grouping.ALL})
    with self.assertRaises(RuntimeError):
      to_spec._sanitize_inputs()

    # dict <GlobalStreamId -> Grouping>
    inputs_dict = {GlobalStreamId("some_spout", "some_stream"): Grouping.NONE,
                   GlobalStreamId("another_spout", "default"): Grouping.fields(['word', 'count'])}
    spec = HeronComponentSpec("bolt", "classpath", False, 1, inputs=inputs_dict)
    ret = spec._sanitize_inputs()
    self.assertEqual(ret, inputs_dict)

    # list of HeronComponentSpec
    from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
    from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=[from_spec1, from_spec2])
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                           GlobalStreamId("spout2", "default"): Grouping.SHUFFLE})

    # HeronComponentSpec's name attribute not set
    from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs=[from_spec])
    with self.assertRaises(RuntimeError):
      to_spec._sanitize_inputs()

    # list of GlobalStreamId
    inputs_list = [GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream")]
    spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list)
    ret = spec._sanitize_inputs()
    self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

    # list of neither GlobalStreamId nor HeronComponentSpec
    inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
    spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list)
    with self.assertRaises(ValueError):
      spec._sanitize_inputs()