Пример #1
0
class TestSane(Topology):
    config = {
        "topology.wide.config.1": "value",
        "spout.overriden.config": True
    }
    spout = HeronComponentSpec(None,
                               "sp_class",
                               True,
                               3,
                               inputs=None,
                               outputs=[
                                   "word", "count",
                                   Stream(fields=['error_msg'],
                                          name='error_stream')
                               ],
                               config={
                                   "spout.specific.config.1": "value",
                                   "spout.specific.config.2": True,
                                   "spout.specific.config.3": -12.4,
                                   "spout.specific.config.4": [1, 2, 3],
                                   "spout.overriden.config": False
                               })
    bolt = HeronComponentSpec(None,
                              "bl_class",
                              False,
                              4,
                              inputs={
                                  spout: Grouping.SHUFFLE,
                                  spout['error_stream']: Grouping.ALL
                              })
Пример #2
0
 def test_class_dict_to_specs(self):
     # duplicate component name
     class_dict = {
         "spout": HeronComponentSpec("same_name", "sp_cls", True, 1),
         "bolt": HeronComponentSpec("same_name", "bl_cls", False, 2)
     }
     with self.assertRaises(ValueError):
         TopologyType.class_dict_to_specs(class_dict)
Пример #3
0
    def test_get_item(self):
        # HeronComponentSpec name set
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec['another_stream']
        self.assertEqual(ret, GlobalStreamId("spout", "another_stream"))

        # HeronComponentSpec name not set
        spec = HeronComponentSpec(None, "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec['default']
        self.assertEqual(ret, GlobalStreamId(spec, "default"))

        # stream id not registered
        spec = HeronComponentSpec(None, "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        with self.assertRaises(ValueError):
            spec['non_existent_stream']
Пример #4
0
    def test_component_id_property(self):
        # component id is string
        gsi = GlobalStreamId(componentId="component", streamId="stream")
        self.assertEqual(gsi.component_id, "component")

        # component id is HeronComponentSpec with name
        spec = HeronComponentSpec("spout", "class", True, 1)
        gsi = GlobalStreamId(spec, "stream")
        self.assertEqual(gsi.component_id, "spout")

        # component id is HeronComponentSpec without name
        spec = HeronComponentSpec(None, "class", True, 1)
        gsi = GlobalStreamId(spec, "stream")
        # expecting "<No name available for HeronComponentSpec yet, uuid: %s>"
        self.assertIn(spec.uuid, gsi.component_id)
Пример #5
0
    def spec(cls,
             name=None,
             inputs=None,
             par=1,
             config=None,
             optional_outputs=None):
        """Register this bolt to the topology and create ``HeronComponentSpec``

    This method takes an optional ``outputs`` argument for supporting dynamic output fields
    declaration. However, it is recommended that ``outputs`` should be declared as
    an attribute of your ``Bolt`` subclass. Also, some ways of declaring inputs is not supported
    in this implementation; please read the documentation below.

    :type name: str
    :param name: Name of this bolt.
    :type inputs: dict or list
    :param inputs: Streams that feed into this Bolt.

                   Two forms of this are acceptable:

                   1. A `dict` mapping from ``HeronComponentSpec`` to ``Grouping``.
                      In this case, default stream is used.
                   2. A `dict` mapping from ``GlobalStreamId`` to ``Grouping``.
                      This ``GlobalStreamId`` object itself is different from StreamParse, because
                      Heron does not use thrift, although its constructor method is compatible.
                   3. A `list` of ``HeronComponentSpec``. In this case, default stream with
                      SHUFFLE grouping is used.
                   4. A `list` of ``GlobalStreamId``. In this case, SHUFFLE grouping is used.
    :type par: int
    :param par: Parallelism hint for this spout.
    :type config: dict
    :param config: Component-specific config settings.
    :type optional_outputs: list of (str or Stream) or tuple of (str or Stream)
    :param optional_outputs: Additional output fields for this bolt. These fields are added to
                             existing ``outputs`` class attributes of your bolt. This is an optional
                             argument, and exists only for supporting dynamic output field
                             declaration.
    """
        python_class_path = "%s.%s" % (cls.__module__, cls.__name__)

        if hasattr(cls, 'outputs'):
            # avoid modification to cls.outputs
            _outputs = copy.copy(cls.outputs)
        else:
            _outputs = []

        if optional_outputs is not None:
            assert isinstance(optional_outputs, (list, tuple))
            for out in optional_outputs:
                assert isinstance(out, (str, Stream))
                _outputs.append(out)

        return HeronComponentSpec(name,
                                  python_class_path,
                                  is_spout=False,
                                  par=par,
                                  inputs=inputs,
                                  outputs=_outputs,
                                  config=config)
Пример #6
0
    def test_add_spec(self):
        builder = TopologyBuilder("Test")

        with self.assertRaises(ValueError):
            builder.add_spec(HeronComponentSpec(None, "path", True, 1))

        with self.assertRaises(TypeError):
            builder.add_spec(None)

        self.assertEqual(len(builder._specs), 0)

        # add 10 specs
        specs = []
        for i in range(10):
            specs.append(HeronComponentSpec(str(i), "path", True, 1))
        builder.add_spec(*specs)
        self.assertEqual(len(builder._specs), 10)
Пример #7
0
    def spec(cls, name=None, par=1, config=None, optional_outputs=None):
        """Register this spout to the topology and create ``HeronComponentSpec``

    The usage of this method is compatible with StreamParse API, although it does not create
    ``ShellBoltSpec`` but instead directly registers to a ``Topology`` class.

    This method takes an optional ``outputs`` argument for supporting dynamic output fields
    declaration. However, it is recommended that ``outputs`` should be declared as
    an attribute of your ``Spout`` subclass. Also, some ways of declaring inputs is not supported
    in this implementation; please read the documentation below.

    :type name: str
    :param name: Name of this spout.
    :type par: int
    :param par: Parallelism hint for this spout.
    :type config: dict
    :param config: Component-specific config settings.
    :type optional_outputs: list of (str or Stream) or tuple of (str or Stream)
    :param optional_outputs: Additional output fields for this spout. These fields are added to
                             existing ``outputs`` class attributes of your spout.
                             This is an optional argument, and exists only for supporting dynamic
                             output field declaration.
    """
        python_class_path = "%s.%s" % (cls.__module__, cls.__name__)

        if hasattr(cls, 'outputs'):
            # avoid modification to cls.outputs
            _outputs = copy.copy(cls.outputs)
        else:
            _outputs = []

        if optional_outputs is not None:
            assert isinstance(optional_outputs, (list, tuple))
            for out in optional_outputs:
                assert isinstance(out, (str, Stream))
                _outputs.append(out)

        return HeronComponentSpec(name,
                                  python_class_path,
                                  is_spout=True,
                                  par=par,
                                  inputs=None,
                                  outputs=_outputs,
                                  config=config)
Пример #8
0
    def spec(cls,
             name,
             par,
             config,
             user_spout_classpath,
             user_output_fields=None):
        python_class_path = "%s.%s" % (cls.__module__, cls.__name__)

        config[integ_const.USER_SPOUT_CLASSPATH] = user_spout_classpath
        # avoid modification to cls.outputs
        _outputs = copy.copy(cls.outputs)
        if user_output_fields is not None:
            _outputs.extend(user_output_fields)
        return HeronComponentSpec(name,
                                  python_class_path,
                                  is_spout=True,
                                  par=par,
                                  inputs=None,
                                  outputs=_outputs,
                                  config=config)
Пример #9
0
 def test_add_bolt_specs(self):
     spec = HeronComponentSpec("bolt", "bl_cls", False, 1)
     with self.assertRaises(ValueError):
         TopologyType.add_bolt_specs(spec, {})
Пример #10
0
 def test_add_spout_specs(self):
     # spout with no output
     spec = HeronComponentSpec("spout", "sp_cls", True, 1)
     with self.assertRaises(ValueError):
         TopologyType.add_spout_specs(spec, {})
Пример #11
0
 class JustBolt(Topology):
     bolt = HeronComponentSpec(None, "bl_class", False, 4)
Пример #12
0
    def test_sanitize_config(self):
        # empty dict
        ret = HeronComponentSpec._sanitize_config({})
        self.assertEqual(ret, {})

        # non-dict given
        with self.assertRaises(TypeError):
            HeronComponentSpec._sanitize_config("{key: value}")
        with self.assertRaises(TypeError):
            HeronComponentSpec._sanitize_config(True)
        with self.assertRaises(TypeError):
            HeronComponentSpec._sanitize_config(None)

        # non-string key
        with self.assertRaises(TypeError):
            HeronComponentSpec._sanitize_config({['k', 'e', 'y']: "value"})
        with self.assertRaises(TypeError):
            HeronComponentSpec._sanitize_config({None: "value"})

        # convert boolean value
        ret = HeronComponentSpec._sanitize_config({"key": True})
        self.assertEqual(ret["key"], "true")
        ret = HeronComponentSpec._sanitize_config({"key": False})
        self.assertEqual(ret["key"], "false")

        # convert int and float
        ret = HeronComponentSpec._sanitize_config({"key": 10})
        self.assertEqual(ret["key"], "10")
        ret = HeronComponentSpec._sanitize_config({"key": -2400000})
        self.assertEqual(ret["key"], "-2400000")
        ret = HeronComponentSpec._sanitize_config({"key": 0.0000001})
        self.assertEqual(ret["key"], "1e-07")
        ret = HeronComponentSpec._sanitize_config({"key": -15.33333})
        self.assertEqual(ret["key"], "-15.33333")

        # non-string value -> should expect the same object
        ret = HeronComponentSpec._sanitize_config(
            {"key": ['v', 'a', 'l', 'u', 'e']})
        self.assertEqual(ret["key"], ['v', 'a', 'l', 'u', 'e'])
        ret = HeronComponentSpec._sanitize_config({"key": None})
        self.assertEqual(ret["key"], None)
Пример #13
0
    def test_get_out_streamids(self):
        # outputs is none
        spec = HeronComponentSpec("spout", "class", True, 1)
        ret = spec.get_out_streamids()
        self.assertEqual(ret, set())

        # outputs neither list nor tuple
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = "string"
        with self.assertRaises(TypeError):
            spec.get_out_streamids()

        # outputs sane
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec.get_out_streamids()
        self.assertEqual(ret, {"default", "another_stream"})
Пример #14
0
    def test_sanitize_args(self):
        # good args
        spec = HeronComponentSpec(name="string",
                                  python_class_path="string.path",
                                  is_spout=True,
                                  par=1)
        self.assertIsNotNone(spec)
        name_none_spec = HeronComponentSpec(name=None,
                                            python_class_path="string.path",
                                            is_spout=True,
                                            par=1)
        self.assertIsNotNone(name_none_spec)

        # bad name
        with self.assertRaises(AssertionError):
            HeronComponentSpec(123, "classpath", True, 1)
        with self.assertRaises(AssertionError):
            HeronComponentSpec(False, "classpath", True, 1)

        # bad classpath
        with self.assertRaises(AssertionError):
            HeronComponentSpec("name", {}, True, 1)
        with self.assertRaises(AssertionError):
            HeronComponentSpec("name", None, True, 1)

        # bad is_spout
        with self.assertRaises(AssertionError):
            HeronComponentSpec("name", "classpath", 1, 1)
        with self.assertRaises(AssertionError):
            HeronComponentSpec("name", "classpath", None, 1)

        # bad par
        with self.assertRaises(AssertionError):
            HeronComponentSpec("name", "classpath", True, "1")
        with self.assertRaises(AssertionError):
            HeronComponentSpec("name", "classpath", True, 1.35)
        with self.assertRaises(AssertionError):
            HeronComponentSpec("name", "classpath", True, -21)
        with self.assertRaises(AssertionError):
            HeronComponentSpec("name", "classpath", True, None)
Пример #15
0
    def test_sanitize_outputs(self):
        # outputs is None (no argument to outputs)
        spec = HeronComponentSpec("spout", "class", True, 1)
        ret = spec._sanitize_outputs()
        self.assertIsNone(ret)

        # outputs neither list nor tuple
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = "string"
        with self.assertRaises(TypeError):
            spec._sanitize_outputs()

        # output list contains a non-string and non-Stream object
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = ["string", False, 123]
        with self.assertRaises(TypeError):
            spec._sanitize_outputs()

        # output list is all string
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = ["string", "hello", "heron"]
        ret = spec._sanitize_outputs()
        self.assertEqual(ret, {"default": ["string", "hello", "heron"]})

        # output list has mixed stream
        spec = HeronComponentSpec("spout", "class", True, 1)
        spec.outputs = [
            "string", "hello",
            Stream(fields=["abc", "def"], name="another_stream"),
            Stream(fields=["another", "default"], name="default")
        ]
        ret = spec._sanitize_outputs()
        self.assertEqual(
            ret, {
                "default": ["string", "hello", "another", "default"],
                "another_stream": ["abc", "def"]
            })
Пример #16
0
    def test_sanitize_inputs(self):
        # Note that _sanitize_inputs() should only be called after HeronComponentSpec's
        # name attribute is set

        # invalid inputs given as argument (valid ones are either dict, list, tuple or None)
        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs="string")
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        invalid_spec = HeronComponentSpec("name",
                                          "classpath",
                                          True,
                                          1,
                                          inputs=100)
        with self.assertRaises(TypeError):
            invalid_spec._sanitize_inputs()

        # dict <HeronComponentSpec -> Grouping>
        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.SHUFFLE})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE})

        from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1)
        from_spec.outputs = [Stream(name='another_stream')]
        to_spec = HeronComponentSpec(
            "bolt",
            "bl_clspath",
            False,
            1,
            inputs={from_spec['another_stream']: Grouping.ALL})
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL})

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs={from_spec: Grouping.ALL})
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # dict <GlobalStreamId -> Grouping>
        inputs_dict = {
            GlobalStreamId("some_spout", "some_stream"):
            Grouping.NONE,
            GlobalStreamId("another_spout", "default"):
            Grouping.fields(['word', 'count'])
        }
        spec = HeronComponentSpec("bolt",
                                  "classpath",
                                  False,
                                  1,
                                  inputs=inputs_dict)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, inputs_dict)

        # list of HeronComponentSpec
        from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1)
        from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_cls",
                                     False,
                                     1,
                                     inputs=[from_spec1, from_spec2])
        ret = to_spec._sanitize_inputs()
        self.assertEqual(
            ret, {
                GlobalStreamId("spout1", "default"): Grouping.SHUFFLE,
                GlobalStreamId("spout2", "default"): Grouping.SHUFFLE
            })

        # HeronComponentSpec's name attribute not set
        from_spec = HeronComponentSpec(None, "sp_clspath", True, 1)
        to_spec = HeronComponentSpec("bolt",
                                     "bl_clspath",
                                     False,
                                     1,
                                     inputs=[from_spec])
        with self.assertRaises(RuntimeError):
            to_spec._sanitize_inputs()

        # list of GlobalStreamId
        inputs_list = [
            GlobalStreamId("spout1", "default"),
            GlobalStreamId("spout2", "some_stream")
        ]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        ret = spec._sanitize_inputs()
        self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2)))

        # list of neither GlobalStreamId nor HeronComponentSpec
        inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]]
        spec = HeronComponentSpec("bolt",
                                  "bl_cls",
                                  False,
                                  1,
                                  inputs=inputs_list)
        with self.assertRaises(ValueError):
            spec._sanitize_inputs()