class TestSane(Topology): config = { "topology.wide.config.1": "value", "spout.overriden.config": True } spout = HeronComponentSpec(None, "sp_class", True, 3, inputs=None, outputs=[ "word", "count", Stream(fields=['error_msg'], name='error_stream') ], config={ "spout.specific.config.1": "value", "spout.specific.config.2": True, "spout.specific.config.3": -12.4, "spout.specific.config.4": [1, 2, 3], "spout.overriden.config": False }) bolt = HeronComponentSpec(None, "bl_class", False, 4, inputs={ spout: Grouping.SHUFFLE, spout['error_stream']: Grouping.ALL })
def test_class_dict_to_specs(self): # duplicate component name class_dict = { "spout": HeronComponentSpec("same_name", "sp_cls", True, 1), "bolt": HeronComponentSpec("same_name", "bl_cls", False, 2) } with self.assertRaises(ValueError): TopologyType.class_dict_to_specs(class_dict)
def test_get_item(self): # HeronComponentSpec name set spec = HeronComponentSpec("spout", "class", True, 1) spec.outputs = [ "string", "hello", Stream(fields=["abc", "def"], name="another_stream"), Stream(fields=["another", "default"], name="default") ] ret = spec['another_stream'] self.assertEqual(ret, GlobalStreamId("spout", "another_stream")) # HeronComponentSpec name not set spec = HeronComponentSpec(None, "class", True, 1) spec.outputs = [ "string", "hello", Stream(fields=["abc", "def"], name="another_stream"), Stream(fields=["another", "default"], name="default") ] ret = spec['default'] self.assertEqual(ret, GlobalStreamId(spec, "default")) # stream id not registered spec = HeronComponentSpec(None, "class", True, 1) spec.outputs = [ "string", "hello", Stream(fields=["abc", "def"], name="another_stream"), Stream(fields=["another", "default"], name="default") ] with self.assertRaises(ValueError): spec['non_existent_stream']
def test_component_id_property(self): # component id is string gsi = GlobalStreamId(componentId="component", streamId="stream") self.assertEqual(gsi.component_id, "component") # component id is HeronComponentSpec with name spec = HeronComponentSpec("spout", "class", True, 1) gsi = GlobalStreamId(spec, "stream") self.assertEqual(gsi.component_id, "spout") # component id is HeronComponentSpec without name spec = HeronComponentSpec(None, "class", True, 1) gsi = GlobalStreamId(spec, "stream") # expecting "<No name available for HeronComponentSpec yet, uuid: %s>" self.assertIn(spec.uuid, gsi.component_id)
def spec(cls, name=None, inputs=None, par=1, config=None, optional_outputs=None): """Register this bolt to the topology and create ``HeronComponentSpec`` This method takes an optional ``outputs`` argument for supporting dynamic output fields declaration. However, it is recommended that ``outputs`` should be declared as an attribute of your ``Bolt`` subclass. Also, some ways of declaring inputs is not supported in this implementation; please read the documentation below. :type name: str :param name: Name of this bolt. :type inputs: dict or list :param inputs: Streams that feed into this Bolt. Two forms of this are acceptable: 1. A `dict` mapping from ``HeronComponentSpec`` to ``Grouping``. In this case, default stream is used. 2. A `dict` mapping from ``GlobalStreamId`` to ``Grouping``. This ``GlobalStreamId`` object itself is different from StreamParse, because Heron does not use thrift, although its constructor method is compatible. 3. A `list` of ``HeronComponentSpec``. In this case, default stream with SHUFFLE grouping is used. 4. A `list` of ``GlobalStreamId``. In this case, SHUFFLE grouping is used. :type par: int :param par: Parallelism hint for this spout. :type config: dict :param config: Component-specific config settings. :type optional_outputs: list of (str or Stream) or tuple of (str or Stream) :param optional_outputs: Additional output fields for this bolt. These fields are added to existing ``outputs`` class attributes of your bolt. This is an optional argument, and exists only for supporting dynamic output field declaration. """ python_class_path = "%s.%s" % (cls.__module__, cls.__name__) if hasattr(cls, 'outputs'): # avoid modification to cls.outputs _outputs = copy.copy(cls.outputs) else: _outputs = [] if optional_outputs is not None: assert isinstance(optional_outputs, (list, tuple)) for out in optional_outputs: assert isinstance(out, (str, Stream)) _outputs.append(out) return HeronComponentSpec(name, python_class_path, is_spout=False, par=par, inputs=inputs, outputs=_outputs, config=config)
def test_add_spec(self): builder = TopologyBuilder("Test") with self.assertRaises(ValueError): builder.add_spec(HeronComponentSpec(None, "path", True, 1)) with self.assertRaises(TypeError): builder.add_spec(None) self.assertEqual(len(builder._specs), 0) # add 10 specs specs = [] for i in range(10): specs.append(HeronComponentSpec(str(i), "path", True, 1)) builder.add_spec(*specs) self.assertEqual(len(builder._specs), 10)
def spec(cls, name=None, par=1, config=None, optional_outputs=None): """Register this spout to the topology and create ``HeronComponentSpec`` The usage of this method is compatible with StreamParse API, although it does not create ``ShellBoltSpec`` but instead directly registers to a ``Topology`` class. This method takes an optional ``outputs`` argument for supporting dynamic output fields declaration. However, it is recommended that ``outputs`` should be declared as an attribute of your ``Spout`` subclass. Also, some ways of declaring inputs is not supported in this implementation; please read the documentation below. :type name: str :param name: Name of this spout. :type par: int :param par: Parallelism hint for this spout. :type config: dict :param config: Component-specific config settings. :type optional_outputs: list of (str or Stream) or tuple of (str or Stream) :param optional_outputs: Additional output fields for this spout. These fields are added to existing ``outputs`` class attributes of your spout. This is an optional argument, and exists only for supporting dynamic output field declaration. """ python_class_path = "%s.%s" % (cls.__module__, cls.__name__) if hasattr(cls, 'outputs'): # avoid modification to cls.outputs _outputs = copy.copy(cls.outputs) else: _outputs = [] if optional_outputs is not None: assert isinstance(optional_outputs, (list, tuple)) for out in optional_outputs: assert isinstance(out, (str, Stream)) _outputs.append(out) return HeronComponentSpec(name, python_class_path, is_spout=True, par=par, inputs=None, outputs=_outputs, config=config)
def spec(cls, name, par, config, user_spout_classpath, user_output_fields=None): python_class_path = "%s.%s" % (cls.__module__, cls.__name__) config[integ_const.USER_SPOUT_CLASSPATH] = user_spout_classpath # avoid modification to cls.outputs _outputs = copy.copy(cls.outputs) if user_output_fields is not None: _outputs.extend(user_output_fields) return HeronComponentSpec(name, python_class_path, is_spout=True, par=par, inputs=None, outputs=_outputs, config=config)
def test_add_bolt_specs(self): spec = HeronComponentSpec("bolt", "bl_cls", False, 1) with self.assertRaises(ValueError): TopologyType.add_bolt_specs(spec, {})
def test_add_spout_specs(self): # spout with no output spec = HeronComponentSpec("spout", "sp_cls", True, 1) with self.assertRaises(ValueError): TopologyType.add_spout_specs(spec, {})
class JustBolt(Topology): bolt = HeronComponentSpec(None, "bl_class", False, 4)
def test_sanitize_config(self): # empty dict ret = HeronComponentSpec._sanitize_config({}) self.assertEqual(ret, {}) # non-dict given with self.assertRaises(TypeError): HeronComponentSpec._sanitize_config("{key: value}") with self.assertRaises(TypeError): HeronComponentSpec._sanitize_config(True) with self.assertRaises(TypeError): HeronComponentSpec._sanitize_config(None) # non-string key with self.assertRaises(TypeError): HeronComponentSpec._sanitize_config({['k', 'e', 'y']: "value"}) with self.assertRaises(TypeError): HeronComponentSpec._sanitize_config({None: "value"}) # convert boolean value ret = HeronComponentSpec._sanitize_config({"key": True}) self.assertEqual(ret["key"], "true") ret = HeronComponentSpec._sanitize_config({"key": False}) self.assertEqual(ret["key"], "false") # convert int and float ret = HeronComponentSpec._sanitize_config({"key": 10}) self.assertEqual(ret["key"], "10") ret = HeronComponentSpec._sanitize_config({"key": -2400000}) self.assertEqual(ret["key"], "-2400000") ret = HeronComponentSpec._sanitize_config({"key": 0.0000001}) self.assertEqual(ret["key"], "1e-07") ret = HeronComponentSpec._sanitize_config({"key": -15.33333}) self.assertEqual(ret["key"], "-15.33333") # non-string value -> should expect the same object ret = HeronComponentSpec._sanitize_config( {"key": ['v', 'a', 'l', 'u', 'e']}) self.assertEqual(ret["key"], ['v', 'a', 'l', 'u', 'e']) ret = HeronComponentSpec._sanitize_config({"key": None}) self.assertEqual(ret["key"], None)
def test_get_out_streamids(self): # outputs is none spec = HeronComponentSpec("spout", "class", True, 1) ret = spec.get_out_streamids() self.assertEqual(ret, set()) # outputs neither list nor tuple spec = HeronComponentSpec("spout", "class", True, 1) spec.outputs = "string" with self.assertRaises(TypeError): spec.get_out_streamids() # outputs sane spec = HeronComponentSpec("spout", "class", True, 1) spec.outputs = [ "string", "hello", Stream(fields=["abc", "def"], name="another_stream"), Stream(fields=["another", "default"], name="default") ] ret = spec.get_out_streamids() self.assertEqual(ret, {"default", "another_stream"})
def test_sanitize_args(self): # good args spec = HeronComponentSpec(name="string", python_class_path="string.path", is_spout=True, par=1) self.assertIsNotNone(spec) name_none_spec = HeronComponentSpec(name=None, python_class_path="string.path", is_spout=True, par=1) self.assertIsNotNone(name_none_spec) # bad name with self.assertRaises(AssertionError): HeronComponentSpec(123, "classpath", True, 1) with self.assertRaises(AssertionError): HeronComponentSpec(False, "classpath", True, 1) # bad classpath with self.assertRaises(AssertionError): HeronComponentSpec("name", {}, True, 1) with self.assertRaises(AssertionError): HeronComponentSpec("name", None, True, 1) # bad is_spout with self.assertRaises(AssertionError): HeronComponentSpec("name", "classpath", 1, 1) with self.assertRaises(AssertionError): HeronComponentSpec("name", "classpath", None, 1) # bad par with self.assertRaises(AssertionError): HeronComponentSpec("name", "classpath", True, "1") with self.assertRaises(AssertionError): HeronComponentSpec("name", "classpath", True, 1.35) with self.assertRaises(AssertionError): HeronComponentSpec("name", "classpath", True, -21) with self.assertRaises(AssertionError): HeronComponentSpec("name", "classpath", True, None)
def test_sanitize_outputs(self): # outputs is None (no argument to outputs) spec = HeronComponentSpec("spout", "class", True, 1) ret = spec._sanitize_outputs() self.assertIsNone(ret) # outputs neither list nor tuple spec = HeronComponentSpec("spout", "class", True, 1) spec.outputs = "string" with self.assertRaises(TypeError): spec._sanitize_outputs() # output list contains a non-string and non-Stream object spec = HeronComponentSpec("spout", "class", True, 1) spec.outputs = ["string", False, 123] with self.assertRaises(TypeError): spec._sanitize_outputs() # output list is all string spec = HeronComponentSpec("spout", "class", True, 1) spec.outputs = ["string", "hello", "heron"] ret = spec._sanitize_outputs() self.assertEqual(ret, {"default": ["string", "hello", "heron"]}) # output list has mixed stream spec = HeronComponentSpec("spout", "class", True, 1) spec.outputs = [ "string", "hello", Stream(fields=["abc", "def"], name="another_stream"), Stream(fields=["another", "default"], name="default") ] ret = spec._sanitize_outputs() self.assertEqual( ret, { "default": ["string", "hello", "another", "default"], "another_stream": ["abc", "def"] })
def test_sanitize_inputs(self): # Note that _sanitize_inputs() should only be called after HeronComponentSpec's # name attribute is set # invalid inputs given as argument (valid ones are either dict, list, tuple or None) invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs="string") with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() invalid_spec = HeronComponentSpec("name", "classpath", True, 1, inputs=100) with self.assertRaises(TypeError): invalid_spec._sanitize_inputs() # dict <HeronComponentSpec -> Grouping> from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.SHUFFLE}) ret = to_spec._sanitize_inputs() self.assertEqual( ret, {GlobalStreamId("spout", "default"): Grouping.SHUFFLE}) from_spec = HeronComponentSpec("spout", "sp_clspath", True, 1) from_spec.outputs = [Stream(name='another_stream')] to_spec = HeronComponentSpec( "bolt", "bl_clspath", False, 1, inputs={from_spec['another_stream']: Grouping.ALL}) ret = to_spec._sanitize_inputs() self.assertEqual( ret, {GlobalStreamId("spout", "another_stream"): Grouping.ALL}) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs={from_spec: Grouping.ALL}) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # dict <GlobalStreamId -> Grouping> inputs_dict = { GlobalStreamId("some_spout", "some_stream"): Grouping.NONE, GlobalStreamId("another_spout", "default"): Grouping.fields(['word', 'count']) } spec = HeronComponentSpec("bolt", "classpath", False, 1, inputs=inputs_dict) ret = spec._sanitize_inputs() self.assertEqual(ret, inputs_dict) # list of HeronComponentSpec from_spec1 = HeronComponentSpec("spout1", "sp1_cls", True, 1) from_spec2 = HeronComponentSpec("spout2", "sp2_cls", True, 1) to_spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=[from_spec1, from_spec2]) ret = to_spec._sanitize_inputs() self.assertEqual( ret, { GlobalStreamId("spout1", "default"): Grouping.SHUFFLE, GlobalStreamId("spout2", "default"): Grouping.SHUFFLE }) # HeronComponentSpec's name attribute not set from_spec = HeronComponentSpec(None, "sp_clspath", True, 1) to_spec = HeronComponentSpec("bolt", "bl_clspath", False, 1, inputs=[from_spec]) with self.assertRaises(RuntimeError): to_spec._sanitize_inputs() # list of GlobalStreamId inputs_list = [ GlobalStreamId("spout1", "default"), GlobalStreamId("spout2", "some_stream") ] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) ret = spec._sanitize_inputs() self.assertEqual(ret, dict(zip(inputs_list, [Grouping.SHUFFLE] * 2))) # list of neither GlobalStreamId nor HeronComponentSpec inputs_list = [None, 123, "string", [GlobalStreamId("sp", "default")]] spec = HeronComponentSpec("bolt", "bl_cls", False, 1, inputs=inputs_list) with self.assertRaises(ValueError): spec._sanitize_inputs()