Пример #1
0
 def _calculate_inputs(self):
     return {
         GlobalStreamId(self._left.get_name(), self._left._output):
         Grouping.custom("heronpy.streamlet.impl.joinbolt.JoinGrouping"),
         GlobalStreamId(self._right.get_name(), self._right._output):
         Grouping.custom("heronpy.streamlet.impl.joinbolt.JoinGrouping")
     }
Пример #2
0
  def test_custom(self):
    # sane
    sane = Grouping.custom(DummyCustomGrouping())
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("CUSTOM"))
    self.assertTrue(isinstance(sane.python_serialized, bytes))

    # arg not string
    with self.assertRaises(TypeError):
      Grouping.custom(None)
    with self.assertRaises(TypeError):
      Grouping.custom(True)
Пример #3
0
    def test_custom(self):
        # sane
        sane = Grouping.custom(DummyCustomGrouping())
        self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("CUSTOM"))
        self.assertTrue(isinstance(sane.python_serialized, bytes))

        # arg not string
        with self.assertRaises(TypeError):
            Grouping.custom(None)
        with self.assertRaises(TypeError):
            Grouping.custom(True)
Пример #4
0
    def test_is_grouping_sane(self):
        self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL))
        self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE))
        self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST))
        self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE))

        self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS))
        sane_fields = Grouping.fields(['hello', 'world'])
        self.assertTrue(Grouping.is_grouping_sane(sane_fields))

        self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM))
        sane_custom = Grouping.custom(DummyCustomGrouping())
        self.assertTrue(Grouping.is_grouping_sane(sane_custom))
Пример #5
0
  def test_is_grouping_sane(self):
    self.assertTrue(Grouping.is_grouping_sane(Grouping.ALL))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.SHUFFLE))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.LOWEST))
    self.assertTrue(Grouping.is_grouping_sane(Grouping.NONE))

    self.assertFalse(Grouping.is_grouping_sane(Grouping.FIELDS))
    sane_fields = Grouping.fields(['hello', 'world'])
    self.assertTrue(Grouping.is_grouping_sane(sane_fields))

    self.assertFalse(Grouping.is_grouping_sane(Grouping.CUSTOM))
    sane_custom = Grouping.custom(DummyCustomGrouping())
    self.assertTrue(Grouping.is_grouping_sane(sane_custom))
 def _calculate_inputs(self):
     return {
         GlobalStreamId(self._parent.get_name(), self._parent._output):
         Grouping.custom(
             "heronpy.streamlet.impl.reducebykeyandwindowbolt.ReduceGrouping"
         )
     }
Пример #7
0
class MultiStream(Topology):
  spout = MultiStreamSpout.spec(par=2)
  count_bolt = CountBolt.spec(par=2,
                              inputs={spout: Grouping.fields('word')},
                              config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10})
  stream_aggregator = StreamAggregateBolt.spec(par=1,
                                               inputs={spout: Grouping.ALL,
                                                       spout['error']: Grouping.ALL},
                                               config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 15})
Пример #8
0
def fields_grouping_builder(topology_name, http_server_url):
  builder = TestTopologyBuilder(topology_name, http_server_url)
  ab_spout = builder.add_spout("ab-spout", ABSpout, 1, max_executions=400)

  count_bolt = builder.add_bolt("count-bolt", WordCountBolt,
                                inputs={ab_spout: Grouping.fields('word')}, par=2)

  builder.add_bolt("sum-bolt", CountAggregatorBolt,
                   inputs={count_bolt: Grouping.NONE}, par=1)

  return builder.create_topology()
Пример #9
0
def fields_grouping_builder(topology_name, http_server_url):
  builder = TestTopologyBuilder(topology_name, http_server_url)
  ab_spout = builder.add_spout("ab-spout", ABSpout, 1, max_executions=400)

  count_bolt = builder.add_bolt("count-bolt", WordCountBolt,
                                inputs={ab_spout: Grouping.fields('word')}, par=2)

  builder.add_bolt("sum-bolt", CountAggregatorBolt,
                   inputs={count_bolt: Grouping.NONE}, par=1)

  return builder.create_topology()
Пример #10
0
  def test_fields(self):
    # sane
    sane = Grouping.fields(['word', 'count'])
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
    self.assertEqual(sane.fields, ['word', 'count'])

    sane = Grouping.fields("just_a_word")
    self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
    self.assertEqual(sane.fields, ['just_a_word'])

    # non-string
    with self.assertRaises(TypeError):
      Grouping.fields(['word', 'count', True])
    with self.assertRaises(TypeError):
      Grouping.fields(123)
    with self.assertRaises(TypeError):
      Grouping.fields(None)

    # fields not specified
    with self.assertRaises(ValueError):
      Grouping.fields()
Пример #11
0
    def _sanitize_inputs(self):
        """Sanitizes input fields and returns a map <GlobalStreamId -> Grouping>"""
        ret = {}
        if self.inputs is None:
            return None

        if isinstance(self.inputs, dict):
            # inputs are dictionary, must be either <HeronComponentSpec -> Grouping> or
            # <GlobalStreamId -> Grouping>
            for key, grouping in list(self.inputs.items()):
                if not Grouping.is_grouping_sane(grouping):
                    raise ValueError('A given grouping is not supported')
                if isinstance(key, HeronComponentSpec):
                    # use default streamid
                    if key.name is None:
                        # should not happen as TopologyType metaclass sets name attribute
                        # before calling this method
                        raise RuntimeError(
                            "In _sanitize_inputs(): HeronComponentSpec doesn't have a name"
                        )
                    global_streamid = GlobalStreamId(key.name,
                                                     Stream.DEFAULT_STREAM_ID)
                    ret[global_streamid] = grouping
                elif isinstance(key, GlobalStreamId):
                    ret[key] = grouping
                else:
                    raise ValueError(
                        f"{str(key)} is not supported as a key to inputs")
        elif isinstance(self.inputs, (list, tuple)):
            # inputs are lists, must be either a list of HeronComponentSpec or GlobalStreamId
            # will use SHUFFLE grouping
            for input_obj in self.inputs:
                if isinstance(input_obj, HeronComponentSpec):
                    if input_obj.name is None:
                        # should not happen as TopologyType metaclass sets name attribute
                        # before calling this method
                        raise RuntimeError(
                            "In _sanitize_inputs(): HeronComponentSpec doesn't have a name"
                        )
                    global_streamid = GlobalStreamId(input_obj.name,
                                                     Stream.DEFAULT_STREAM_ID)
                    ret[global_streamid] = Grouping.SHUFFLE
                elif isinstance(input_obj, GlobalStreamId):
                    ret[input_obj] = Grouping.SHUFFLE
                else:
                    raise ValueError(
                        f"{str(input_obj)} is not supported as an input")
        else:
            raise TypeError(
                f"Inputs must be a list, dict, or None, given: {str(self.inputs)}"
            )

        return ret
Пример #12
0
    def test_fields(self):
        # sane
        sane = Grouping.fields(['word', 'count'])
        self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
        self.assertEqual(sane.fields, ['word', 'count'])

        sane = Grouping.fields("just_a_word")
        self.assertEqual(sane.gtype, topology_pb2.Grouping.Value("FIELDS"))
        self.assertEqual(sane.fields, ['just_a_word'])

        # non-string
        with self.assertRaises(TypeError):
            Grouping.fields(['word', 'count', True])
        with self.assertRaises(TypeError):
            Grouping.fields(123)
        with self.assertRaises(TypeError):
            Grouping.fields(None)

        # fields not specified
        with self.assertRaises(ValueError):
            Grouping.fields()
Пример #13
0
  def _sanitize_inputs(self):
    """Sanitizes input fields and returns a map <GlobalStreamId -> Grouping>"""
    ret = {}
    if self.inputs is None:
      return

    if isinstance(self.inputs, dict):
      # inputs are dictionary, must be either <HeronComponentSpec -> Grouping> or
      # <GlobalStreamId -> Grouping>
      for key, grouping in self.inputs.items():
        if not Grouping.is_grouping_sane(grouping):
          raise ValueError('A given grouping is not supported')
        if isinstance(key, HeronComponentSpec):
          # use default streamid
          if key.name is None:
            # should not happen as TopologyType metaclass sets name attribute
            # before calling this method
            raise RuntimeError("In _sanitize_inputs(): HeronComponentSpec doesn't have a name")
          global_streamid = GlobalStreamId(key.name, Stream.DEFAULT_STREAM_ID)
          ret[global_streamid] = grouping
        elif isinstance(key, GlobalStreamId):
          ret[key] = grouping
        else:
          raise ValueError("%s is not supported as a key to inputs" % str(key))
    elif isinstance(self.inputs, (list, tuple)):
      # inputs are lists, must be either a list of HeronComponentSpec or GlobalStreamId
      # will use SHUFFLE grouping
      for input_obj in self.inputs:
        if isinstance(input_obj, HeronComponentSpec):
          if input_obj.name is None:
            # should not happen as TopologyType metaclass sets name attribute
            # before calling this method
            raise RuntimeError("In _sanitize_inputs(): HeronComponentSpec doesn't have a name")
          global_streamid = GlobalStreamId(input_obj.name, Stream.DEFAULT_STREAM_ID)
          ret[global_streamid] = Grouping.SHUFFLE
        elif isinstance(input_obj, GlobalStreamId):
          ret[input_obj] = Grouping.SHUFFLE
        else:
          raise ValueError("%s is not supported as an input" % str(input_obj))
    else:
      raise TypeError("Inputs must be a list, dict, or None, given: %s" % str(self.inputs))

    return ret
Пример #14
0
 def _calculate_inputs(self):
     return {
         GlobalStreamId(self._parent.get_name(), self._parent._output):
         Grouping.custom(
             RepartitionCustomGrouping(self._repartition_function))
     }
Пример #15
0
import sys

import heronpy.api.api_constants as constants
from heronpy.api.topology import TopologyBuilder
from heronpy.api.stream import Grouping
from heronpy.api.bolt.window_bolt import SlidingWindowBolt
from heron.examples.src.python.spout import WordSpout
from examples.src.python.bolt import WindowSizeBolt

# Topology is defined using a topology builder
# Refer to multi_stream_topology for defining a topology by subclassing Topology
# pylint: disable=superfluous-parens
if __name__ == '__main__':
  if len(sys.argv) != 2:
    print("Topology's name is not specified")
    sys.exit(1)

  builder = TopologyBuilder(name=sys.argv[1])

  word_spout = builder.add_spout("word_spout", WordSpout, par=2)
  count_bolt = builder.add_bolt("count_bolt", WindowSizeBolt, par=2,
                                inputs={word_spout: Grouping.fields('word')},
                                config={SlidingWindowBolt.WINDOW_DURATION_SECS: 10,
                                        SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS: 2})

  topology_config = {constants.TOPOLOGY_RELIABILITY_MODE:
                         constants.TopologyReliabilityMode.ATLEAST_ONCE}
  builder.set_config(topology_config)

  builder.build_and_submit()
Пример #16
0
 def _calculate_inputs(self):
   return {GlobalStreamId(self._left.get_name(), self._left._output) :
           Grouping.custom("heronpy.streamlet.impl.joinbolt.JoinGrouping"),
           GlobalStreamId(self._right.get_name(), self._right._output) :
           Grouping.custom("heronpy.streamlet.impl.joinbolt.JoinGrouping")}
Пример #17
0
 def _calculate_inputs(self):
     inputs = {}
     for parent in self._parents:
         inputs[GlobalStreamId(parent._stage_name, parent._output)] = \
                Grouping.custom("heronpy.dsl.joinbolt.JoinGrouping")
     return inputs
Пример #18
0
 def _calculate_inputs(self):
   return {GlobalStreamId(self._parent.get_name(), self._parent._output) :
           Grouping.custom("heronpy.streamlet.impl.reducebykeyandwindowbolt.ReduceGrouping")}
Пример #19
0
  def test_sanitize_inputs(self):
    # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
    # name attribute is set

    # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
    invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs="string")
    with self.assertRaises(TypeError):
      invalid_spec._sanitize_inputs()

    invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs=100)
    with self.assertRaises(TypeError):
      invalid_spec._sanitize_inputs()

    # dict <HeronComponentSpec -> Grouping>
    from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec: Grouping.SHUFFLE})
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

    from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
    from_spec.outputs = [Stream(name='another_stream')]
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec['another_stream']: Grouping.ALL})
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

    # HeronComponentSpec's name attribute not set
    from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1,
                                 inputs={from_spec: Grouping.ALL})
    with self.assertRaises(RuntimeError):
      to_spec._sanitize_inputs()

    # dict <GlobalStreamId -> Grouping>
    inputs_dict = {GlobalStreamId("some_spout", "some_stream"): Grouping.NONE,
                   GlobalStreamId("another_spout", "default"): Grouping.fields(['word', 'count'])}
    spec = HeronComponentSpec("bolt", "classpath", False, 1, inputs=inputs_dict)
    ret = spec._sanitize_inputs()
    self.assertEqual(ret, inputs_dict)

    # list of HeronComponentSpec
    from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
    from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=[from_spec1, from_spec2])
    ret = to_spec._sanitize_inputs()
    self.assertEqual(ret, {GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                           GlobalStreamId("spout2", "default"): Grouping.SHUFFLE})

    # HeronComponentSpec's name attribute not set
    from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
    to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs=[from_spec])
    with self.assertRaises(RuntimeError):
      to_spec._sanitize_inputs()

    # list of GlobalStreamId
    inputs_list = [GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream")]
    spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list)
    ret = spec._sanitize_inputs()
    self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

    # list of neither GlobalStreamId nor HeronComponentSpec
    inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
    spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list)
    with self.assertRaises(ValueError):
      spec._sanitize_inputs()
Пример #20
0
class CustomGrouping(Topology):
  word_spout = WordSpout.spec(par=1)
  consume_bolt = ConsumeBolt.spec(par=3,
                                  inputs={word_spout: Grouping.custom(SampleCustomGrouping())},
                                  config={constants.TOPOLOGY_TICK_TUPLE_FREQ_SECS: 10})
Пример #21
0
 def _calculate_inputs(self):
   return {GlobalStreamId(self._parent.get_name(), self._parent._output) :
           Grouping.custom(RepartitionCustomGrouping(self._repartition_function))}
Пример #22
0
'''Example WindowSizeTopology'''
import sys

import heronpy.api.api_constants as constants
from heronpy.api.topology import TopologyBuilder
from heronpy.api.stream import Grouping
from heronpy.api.bolt.window_bolt import SlidingWindowBolt
from heron.examples.src.python.spout import WordSpout
from examples.src.python.bolt import WindowSizeBolt

# Topology is defined using a topology builder
# Refer to multi_stream_topology for defining a topology by subclassing Topology
if __name__ == '__main__':
  if len(sys.argv) != 2:
    print "Topology's name is not specified"
    sys.exit(1)

  builder = TopologyBuilder(name=sys.argv[1])

  word_spout = builder.add_spout("word_spout", WordSpout, par=2)
  count_bolt = builder.add_bolt("count_bolt", WindowSizeBolt, par=2,
                                inputs={word_spout: Grouping.fields('word')},
                                config={SlidingWindowBolt.WINDOW_DURATION_SECS: 10,
                                        SlidingWindowBolt.WINDOW_SLIDEINTERVAL_SECS: 2})

  topology_config = {constants.TOPOLOGY_RELIABILITY_MODE:
                         constants.TopologyReliabilityMode.ATLEAST_ONCE}
  builder.set_config(topology_config)

  builder.build_and_submit()
Пример #23
0
    def test_sanitize_inputs(self):
        # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
        # name attribute is set

        # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs="string")
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs=100)
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        # dict <HeronComponentSpec -> Grouping>
        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.SHUFFLE})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        from_spec.outputs = [Stream(name='another_stream')]
        to_spec = HeronComponentSpec(
            "bolt",
            "bl_clspath",
            False,
            1,
            inputs={from_spec['another_stream']: Grouping.ALL})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.ALL})
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # dict <GlobalStreamId -> Grouping>
        inputs_dict = {
            GlobalStreamId("some_spout", "some_stream"):
            Grouping.NONE,
            GlobalStreamId("another_spout", "default"):
            Grouping.fields(['word', 'count'])
        }
        spec = HeronComponentSpec("bolt",
                                  "classpath",
                                  False,
                                  1,
                                  inputs=inputs_dict)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, inputs_dict)

        # list of HeronComponentSpec
        from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
        from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_cls",
                                     False,
                                     1,
                                     inputs=[from_spec1, from_spec2])
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {
                GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                GlobalStreamId("spout2", "default"): Grouping.SHUFFLE
            })

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs=[from_spec])
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # list of GlobalStreamId
        inputs_list = [
            GlobalStreamId("spout1", "default"),
            GlobalStreamId("spout2", "some_stream")
        ]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

        # list of neither GlobalStreamId nor HeronComponentSpec
        inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        with self.assertRaises(ValueError):
            spec._sanitize_inputs()
Пример #24
0
 def _calculate_inputs(self):
     return {
         GlobalStreamId(self._parents[0]._stage_name, self._parents[0]._output):
         Grouping.custom(
             "heronpy.dsl.reducebykeyandwindowbolt.ReduceGrouping")
     }