def extract_physical_plan(self, topology): """ Returns the representation of physical plan that will be returned from Tracker. """ physicalPlan = { "instances": {}, "instance_groups": {}, "stmgrs": {}, "spouts": {}, "bolts": {}, "config": {}, "components": {} } if not topology.physical_plan: return physicalPlan spouts = topology.spouts() bolts = topology.bolts() stmgrs = None instances = None # Physical Plan stmgrs = list(topology.physical_plan.stmgrs) instances = list(topology.physical_plan.instances) # Configs if topology.physical_plan.topology.topology_config: physicalPlan["config"] = convert_pb_kvs( topology.physical_plan.topology.topology_config.kvs) for spout in spouts: spout_name = spout.comp.name physicalPlan["spouts"][spout_name] = [] if spout_name not in physicalPlan["components"]: physicalPlan["components"][spout_name] = { "config": convert_pb_kvs(spout.comp.config.kvs) } for bolt in bolts: bolt_name = bolt.comp.name physicalPlan["bolts"][bolt_name] = [] if bolt_name not in physicalPlan["components"]: physicalPlan["components"][bolt_name] = { "config": convert_pb_kvs(bolt.comp.config.kvs) } for stmgr in stmgrs: host = stmgr.host_name cwd = stmgr.cwd shell_port = stmgr.shell_port if stmgr.HasField( "shell_port") else None physicalPlan["stmgrs"][stmgr.id] = { "id": stmgr.id, "host": host, "port": stmgr.data_port, "shell_port": shell_port, "cwd": cwd, "pid": stmgr.pid, "joburl": utils.make_shell_job_url(host, shell_port, cwd), "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd), "instance_ids": [] } instance_groups = collections.OrderedDict() for instance in instances: instance_id = instance.instance_id stmgrId = instance.stmgr_id name = instance.info.component_name stmgrInfo = physicalPlan["stmgrs"][stmgrId] host = stmgrInfo["host"] cwd = stmgrInfo["cwd"] shell_port = stmgrInfo["shell_port"] # instance_id format container_<index>_component_1 # group name is container_<index> group_name = instance_id.rsplit("_", 2)[0] igroup = instance_groups.get(group_name, list()) igroup.append(instance_id) instance_groups[group_name] = igroup physicalPlan["instances"][instance_id] = { "id": instance_id, "name": name, "stmgrId": stmgrId, "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id), } physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id) if name in physicalPlan["spouts"]: physicalPlan["spouts"][name].append(instance_id) else: physicalPlan["bolts"][name].append(instance_id) physicalPlan["instance_groups"] = instance_groups return physicalPlan
def extract_physical_plan(self, topology): """ Returns the representation of physical plan that will be returned from Tracker. """ physicalPlan = { "instances": {}, "instance_groups": {}, "stmgrs": {}, "spouts": {}, "bolts": {}, "config": {}, "components": {} } if not topology.physical_plan: return physicalPlan spouts = topology.spouts() bolts = topology.bolts() stmgrs = None instances = None # Physical Plan stmgrs = list(topology.physical_plan.stmgrs) instances = list(topology.physical_plan.instances) # Configs if topology.physical_plan.topology.topology_config: physicalPlan["config"] = convert_pb_kvs(topology.physical_plan.topology.topology_config.kvs) for spout in spouts: spout_name = spout.comp.name physicalPlan["spouts"][spout_name] = [] if spout_name not in physicalPlan["components"]: physicalPlan["components"][spout_name] = { "config": convert_pb_kvs(spout.comp.config.kvs) } for bolt in bolts: bolt_name = bolt.comp.name physicalPlan["bolts"][bolt_name] = [] if bolt_name not in physicalPlan["components"]: physicalPlan["components"][bolt_name] = { "config": convert_pb_kvs(bolt.comp.config.kvs) } for stmgr in stmgrs: host = stmgr.host_name cwd = stmgr.cwd shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None physicalPlan["stmgrs"][stmgr.id] = { "id": stmgr.id, "host": host, "port": stmgr.data_port, "shell_port": shell_port, "cwd": cwd, "pid": stmgr.pid, "joburl": utils.make_shell_job_url(host, shell_port, cwd), "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd), "instance_ids": [] } instance_groups = collections.OrderedDict() for instance in instances: instance_id = instance.instance_id stmgrId = instance.stmgr_id name = instance.info.component_name stmgrInfo = physicalPlan["stmgrs"][stmgrId] host = stmgrInfo["host"] cwd = stmgrInfo["cwd"] shell_port = stmgrInfo["shell_port"] # instance_id format container_<index>_component_1 # group name is container_<index> group_name = instance_id.rsplit("_", 2)[0] igroup = instance_groups.get(group_name, list()) igroup.append(instance_id) instance_groups[group_name] = igroup physicalPlan["instances"][instance_id] = { "id": instance_id, "name": name, "stmgrId": stmgrId, "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id), } physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id) if name in physicalPlan["spouts"]: physicalPlan["spouts"][name].append(instance_id) else: physicalPlan["bolts"][name].append(instance_id) physicalPlan["instance_groups"] = instance_groups return physicalPlan
def extract_physical_plan(self, topology): """ Returns the representation of physical plan that will be returned from Tracker. """ physicalPlan = { "instances": {}, "instance_groups": {}, "stmgrs": {}, "spouts": {}, "bolts": {}, "config": {}, } if not topology.physical_plan: return physicalPlan spouts = topology.spouts() bolts = topology.bolts() stmgrs = None instances = None # Physical Plan stmgrs = list(topology.physical_plan.stmgrs) instances = list(topology.physical_plan.instances) # Configs if topology.physical_plan.topology.topology_config: for kvs in topology.physical_plan.topology.topology_config.kvs: if kvs.value: physicalPlan["config"][kvs.key] = kvs.value elif kvs.serialized_value: # currently assumes that serialized_value is Java serialization # when multi-language support is added later, ConfigValueType should be checked # Hexadecimal byte array for Serialized objects try: pobj = javaobj.loads(kvs.serialized_value) physicalPlan["config"][kvs.key] = { 'value' : json.dumps(pobj, default=lambda custom_field: custom_field.__dict__, sort_keys=True, indent=2), 'raw' : utils.hex_escape(kvs.serialized_value)} except Exception: Log.exception("Failed to parse data as java object") physicalPlan["config"][kvs.key] = { # The value should be a valid json object 'value' : '{}', 'raw' : utils.hex_escape(kvs.serialized_value)} for spout in spouts: spout_name = spout.comp.name physicalPlan["spouts"][spout_name] = [] for bolt in bolts: bolt_name = bolt.comp.name physicalPlan["bolts"][bolt_name] = [] for stmgr in stmgrs: host = stmgr.host_name cwd = stmgr.cwd shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None physicalPlan["stmgrs"][stmgr.id] = { "id": stmgr.id, "host": host, "port": stmgr.data_port, "shell_port": shell_port, "cwd": cwd, "pid": stmgr.pid, "joburl": utils.make_shell_job_url(host, shell_port, cwd), "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd), "instance_ids": [] } instance_groups = collections.OrderedDict() for instance in instances: instance_id = instance.instance_id stmgrId = instance.stmgr_id name = instance.info.component_name stmgrInfo = physicalPlan["stmgrs"][stmgrId] host = stmgrInfo["host"] cwd = stmgrInfo["cwd"] shell_port = stmgrInfo["shell_port"] index = int(instance.info.component_index) + 1 group_name = "container_%d" % index igroup = instance_groups.get(group_name, list()) igroup.append(instance_id) instance_groups[group_name] = igroup physicalPlan["instances"][instance_id] = { "id": instance_id, "name": name, "stmgrId": stmgrId, "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id), } physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id) if name in physicalPlan["spouts"]: physicalPlan["spouts"][name].append(instance_id) else: physicalPlan["bolts"][name].append(instance_id) physicalPlan["instance_groups"] = instance_groups return physicalPlan
def _build_physical_plan(physical_plan) -> TopologyInfoPhysicalPlan: if not physical_plan: return TopologyInfoPhysicalPlan( instances={}, instance_groups={}, stmgrs={}, spouts={}, bolts={}, config={}, components={}, ) config = {} if physical_plan.topology.topology_config: config = utils.convert_pb_kvs( physical_plan.topology.topology_config.kvs) components = {} spouts = {} bolts = {} for spout in physical_plan.topology.spouts: name = spout.comp.name spouts[name] = [] if name not in components: components[name] = PhysicalPlanComponent( config=utils.convert_pb_kvs(spout.comp.config.kvs), ) for bolt in physical_plan.topology.bolts: name = bolt.comp.name bolts[name] = [] if name not in components: components[name] = PhysicalPlanComponent( config=utils.convert_pb_kvs(bolt.comp.config.kvs), ) stmgrs = {} for stmgr in physical_plan.stmgrs: shell_port = stmgr.shell_port if stmgr.HasField( "shell_port") else None stmgrs[stmgr.id] = PhysicalPlanStmgr( id=stmgr.id, host=stmgr.host_name, port=stmgr.data_port, shell_port=shell_port, cwd=stmgr.cwd, pid=stmgr.pid, joburl=utils.make_shell_job_url(stmgr.host_name, shell_port, stmgr.cwd), logfiles=utils.make_shell_logfiles_url(stmgr.host_name, stmgr.shell_port, stmgr.cwd), instance_ids=[], ) instances = {} instance_groups = {} for instance in physical_plan.instances: component_name = instance.info.component_name instance_id = instance.instance_id if component_name in spouts: spouts[component_name].append(instance_id) else: bolts[component_name].append(instance_id) stmgr = stmgrs[instance.stmgr_id] stmgr.instance_ids.append(instance_id) instances[instance_id] = PhysicalPlanInstance( id=instance_id, name=component_name, stmgr_id=instance.stmgr_id, logfile=utils.make_shell_logfiles_url( stmgr.host, stmgr.shell_port, stmgr.cwd, instance_id, ), ) # instance_id example: container_1_component_1 # group name would be: container_1 group_name = instance_id.rsplit("_", 2)[0] instance_groups.setdefault(group_name, []).append(instance_id) return TopologyInfoPhysicalPlan( instances=instances, instance_groups=instance_groups, stmgrs=stmgrs, spouts=spouts, bolts=bolts, components=components, config=config, )
def extract_physical_plan(self, topology): """ Returns the representation of physical plan that will be returned from Tracker. """ physicalPlan = { "instances": {}, "stmgrs": {}, "spouts": {}, "bolts": {}, "config": {}, } if not topology.physical_plan: return physicalPlan spouts = topology.spouts() bolts = topology.bolts() stmgrs = None instances = None # Physical Plan stmgrs = list(topology.physical_plan.stmgrs) instances = list(topology.physical_plan.instances) # Configs if topology.physical_plan.topology.topology_config: for kvs in topology.physical_plan.topology.topology_config.kvs: if kvs.value: physicalPlan["config"][kvs.key] = kvs.value elif kvs.serialized_value: # currently assumes that serialized_value is Java serialization # when multi-language support is added later, ConfigValueType should be checked # Hexadecimal byte array for Serialized objects try: pobj = javaobj.loads(kvs.serialized_value) physicalPlan["config"][kvs.key] = { 'value' : json.dumps(pobj, default=lambda custom_field: custom_field.__dict__, sort_keys=True, indent=2), 'raw' : utils.hex_escape(kvs.serialized_value)} except Exception: physicalPlan["config"][kvs.key] = { 'value' : 'A Java Object', 'raw' : utils.hex_escape(kvs.serialized_value)} for spout in spouts: spout_name = spout.comp.name physicalPlan["spouts"][spout_name] = [] for bolt in bolts: bolt_name = bolt.comp.name physicalPlan["bolts"][bolt_name] = [] for stmgr in stmgrs: host = stmgr.host_name cwd = stmgr.cwd shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None physicalPlan["stmgrs"][stmgr.id] = { "id": stmgr.id, "host": host, "port": stmgr.data_port, "shell_port": shell_port, "cwd": cwd, "pid": stmgr.pid, "joburl": utils.make_shell_job_url(host, shell_port, cwd), "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd), "instance_ids": [] } for instance in instances: instance_id = instance.instance_id stmgrId = instance.stmgr_id name = instance.info.component_name stmgrInfo = physicalPlan["stmgrs"][stmgrId] host = stmgrInfo["host"] cwd = stmgrInfo["cwd"] shell_port = stmgrInfo["shell_port"] physicalPlan["instances"][instance_id] = { "id": instance_id, "name": name, "stmgrId": stmgrId, "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id), } physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id) if name in physicalPlan["spouts"]: physicalPlan["spouts"][name].append(instance_id) else: physicalPlan["bolts"][name].append(instance_id) return physicalPlan