Exemplo n.º 1
0
  def extract_logical_plan(self, topology):
    """
    Returns the representation of logical plan that will
    be returned from Tracker.
    """
    logicalPlan = {
        "spouts": {},
        "bolts": {},
    }

    # Add spouts.
    for spout in topology.spouts():
      spoutName = spout.comp.name
      spoutType = "default"
      spoutSource = "NA"
      spoutVersion = "NA"
      spoutConfigs = spout.comp.config.kvs
      for kvs in spoutConfigs:
        if kvs.key == "spout.type":
          spoutType = javaobj.loads(kvs.serialized_value)
        elif kvs.key == "spout.source":
          spoutSource = javaobj.loads(kvs.serialized_value)
        elif kvs.key == "spout.version":
          spoutVersion = javaobj.loads(kvs.serialized_value)
      spoutPlan = {
          "config": convert_pb_kvs(spoutConfigs, include_non_primitives=False),
          "type": spoutType,
          "source": spoutSource,
          "version": spoutVersion,
          "outputs": []
      }
      for outputStream in list(spout.outputs):
        spoutPlan["outputs"].append({
            "stream_name": outputStream.stream.id
        })

      logicalPlan["spouts"][spoutName] = spoutPlan

    # Add bolts.
    for bolt in topology.bolts():
      boltName = bolt.comp.name
      boltPlan = {
          "config": convert_pb_kvs(bolt.comp.config.kvs, include_non_primitives=False),
          "outputs": [],
          "inputs": []
      }
      for outputStream in list(bolt.outputs):
        boltPlan["outputs"].append({
            "stream_name": outputStream.stream.id
        })
      for inputStream in list(bolt.inputs):
        boltPlan["inputs"].append({
            "stream_name": inputStream.stream.id,
            "component_name": inputStream.stream.component_name,
            "grouping": topology_pb2.Grouping.Name(inputStream.gtype)
        })

      logicalPlan["bolts"][boltName] = boltPlan

    return logicalPlan
Exemplo n.º 2
0
  def extract_logical_plan(self, topology):
    """
    Returns the representation of logical plan that will
    be returned from Tracker.
    """
    logicalPlan = {
        "spouts": {},
        "bolts": {},
    }

    # Add spouts.
    for spout in topology.spouts():
      spoutName = spout.comp.name
      spoutType = "default"
      spoutSource = "NA"
      spoutVersion = "NA"
      spoutConfigs = spout.comp.config.kvs
      for kvs in spoutConfigs:
        if kvs.key == "spout.type":
          spoutType = javaobj.loads(kvs.serialized_value)
        elif kvs.key == "spout.source":
          spoutSource = javaobj.loads(kvs.serialized_value)
        elif kvs.key == "spout.version":
          spoutVersion = javaobj.loads(kvs.serialized_value)
      spoutPlan = {
          "config": convert_pb_kvs(spoutConfigs, include_non_primitives=False),
          "type": spoutType,
          "source": spoutSource,
          "version": spoutVersion,
          "outputs": []
      }
      for outputStream in list(spout.outputs):
        spoutPlan["outputs"].append({
            "stream_name": outputStream.stream.id
        })

      logicalPlan["spouts"][spoutName] = spoutPlan

    # Add bolts.
    for bolt in topology.bolts():
      boltName = bolt.comp.name
      boltPlan = {
          "config": convert_pb_kvs(bolt.comp.config.kvs, include_non_primitives=False),
          "outputs": [],
          "inputs": []
      }
      for outputStream in list(bolt.outputs):
        boltPlan["outputs"].append({
            "stream_name": outputStream.stream.id
        })
      for inputStream in list(bolt.inputs):
        boltPlan["inputs"].append({
            "stream_name": inputStream.stream.id,
            "component_name": inputStream.stream.component_name,
            "grouping": topology_pb2.Grouping.Name(inputStream.gtype)
        })

      logicalPlan["bolts"][boltName] = boltPlan

    return logicalPlan
Exemplo n.º 3
0
def _convert_java_value(kv, include_non_primitives=True):
    try:
        pobj = javaobj.loads(kv.serialized_value)
        if pyutils.is_str_instance(pobj):
            return pobj

        if pobj.is_primitive():
            return pobj.value

        if include_non_primitives:
            # java objects that are not strings return value and encoded value
            # Hexadecimal byte array for Serialized objects that
            return {
                'value':
                json.dumps(pobj,
                           default=lambda custom_field: custom_field.__dict__,
                           sort_keys=True,
                           indent=2),
                'raw':
                utils.hex_escape(kv.serialized_value)
            }

        return None
    except Exception:
        Log.exception("Failed to parse data as java object")
        if include_non_primitives:
            return _raw_value(kv)
        else:
            return None
Exemplo n.º 4
0
def _convert_java_value(kv, include_non_primitives=True):
  try:
    pobj = javaobj.loads(kv.serialized_value)
    if pyutils.is_str_instance(pobj):
      return pobj

    if pobj.is_primitive():
      return pobj.value

    if include_non_primitives:
      # java objects that are not strings return value and encoded value
      # Hexadecimal byte array for Serialized objects that
      return {
          'value' : json.dumps(pobj,
                               default=lambda custom_field: custom_field.__dict__,
                               sort_keys=True,
                               indent=2),
          'raw' : utils.hex_escape(kv.serialized_value)}

    return None
  except Exception:
    Log.exception("Failed to parse data as java object")
    if include_non_primitives:
      return _raw_value(kv)
    else:
      return None
Exemplo n.º 5
0
    def extract_logical_plan(self, topology):
        """
    Returns the representation of logical plan that will
    be returned from Tracker.
    """
        logicalPlan = {
            "spouts": {},
            "bolts": {},
        }

        # Add spouts.
        for spout in topology.spouts():
            spoutName = spout.comp.name
            spoutType = "default"
            spoutSource = "NA"
            spoutVersion = "NA"
            spoutConfigs = spout.comp.config.kvs
            spoutExtraLinks = []
            for kvs in spoutConfigs:
                if kvs.key == "spout.type":
                    spoutType = javaobj.loads(kvs.serialized_value)
                elif kvs.key == "spout.source":
                    spoutSource = javaobj.loads(kvs.serialized_value)
                elif kvs.key == "spout.version":
                    spoutVersion = javaobj.loads(kvs.serialized_value)
                elif kvs.key == "extra.links":
                    spoutExtraLinks = json.loads(
                        javaobj.loads(kvs.serialized_value))

            spoutPlan = {
                "config":
                convert_pb_kvs(spoutConfigs, include_non_primitives=False),
                "type":
                spoutType,
                "source":
                spoutSource,
                "version":
                spoutVersion,
                "outputs": [],
                "extra_links":
                spoutExtraLinks,
            }

            # render component extra links with general params
            execution_state = topology.execution_state
            executionState = {
                "cluster": execution_state.cluster,
                "environ": execution_state.environ,
                "role": execution_state.role,
                "jobname": topology.name,
                "submission_user": execution_state.submission_user,
            }

            for link in spoutPlan["extra_links"]:
                link[EXTRA_LINK_URL_KEY] = self.config.get_formatted_url(
                    link[EXTRA_LINK_FORMATTER_KEY], executionState)

            for outputStream in list(spout.outputs):
                spoutPlan["outputs"].append(
                    {"stream_name": outputStream.stream.id})

            logicalPlan["spouts"][spoutName] = spoutPlan

        # Add bolts.
        for bolt in topology.bolts():
            boltName = bolt.comp.name
            boltPlan = {
                "config":
                convert_pb_kvs(bolt.comp.config.kvs,
                               include_non_primitives=False),
                "outputs": [],
                "inputs": []
            }
            for outputStream in list(bolt.outputs):
                boltPlan["outputs"].append(
                    {"stream_name": outputStream.stream.id})
            for inputStream in list(bolt.inputs):
                boltPlan["inputs"].append({
                    "stream_name":
                    inputStream.stream.id,
                    "component_name":
                    inputStream.stream.component_name,
                    "grouping":
                    topology_pb2.Grouping.Name(inputStream.gtype)
                })

            logicalPlan["bolts"][boltName] = boltPlan

        return logicalPlan
Exemplo n.º 6
0
  def extract_physical_plan(self, topology):
    """
    Returns the representation of physical plan that will
    be returned from Tracker.
    """
    physicalPlan = {
        "instances": {},
        "instance_groups": {},
        "stmgrs": {},
        "spouts": {},
        "bolts": {},
        "config": {},
    }

    if not topology.physical_plan:
      return physicalPlan

    spouts = topology.spouts()
    bolts = topology.bolts()
    stmgrs = None
    instances = None

    # Physical Plan
    stmgrs = list(topology.physical_plan.stmgrs)
    instances = list(topology.physical_plan.instances)

    # Configs
    if topology.physical_plan.topology.topology_config:
      for kvs in topology.physical_plan.topology.topology_config.kvs:
        if kvs.value:
          physicalPlan["config"][kvs.key] = kvs.value
        elif kvs.serialized_value:
          # currently assumes that serialized_value is Java serialization
          # when multi-language support is added later, ConfigValueType should be checked

          # Hexadecimal byte array for Serialized objects
          try:
            pobj = javaobj.loads(kvs.serialized_value)
            physicalPlan["config"][kvs.key] = {
                'value' : json.dumps(pobj,
                                     default=lambda custom_field: custom_field.__dict__,
                                     sort_keys=True,
                                     indent=2),
                'raw' : utils.hex_escape(kvs.serialized_value)}
          except Exception:
            Log.exception("Failed to parse data as java object")
            physicalPlan["config"][kvs.key] = {
                # The value should be a valid json object
                'value' : '{}',
                'raw' : utils.hex_escape(kvs.serialized_value)}
    for spout in spouts:
      spout_name = spout.comp.name
      physicalPlan["spouts"][spout_name] = []
    for bolt in bolts:
      bolt_name = bolt.comp.name
      physicalPlan["bolts"][bolt_name] = []

    for stmgr in stmgrs:
      host = stmgr.host_name
      cwd = stmgr.cwd
      shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None
      physicalPlan["stmgrs"][stmgr.id] = {
          "id": stmgr.id,
          "host": host,
          "port": stmgr.data_port,
          "shell_port": shell_port,
          "cwd": cwd,
          "pid": stmgr.pid,
          "joburl": utils.make_shell_job_url(host, shell_port, cwd),
          "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd),
          "instance_ids": []
      }

    instance_groups = collections.OrderedDict()
    for instance in instances:
      instance_id = instance.instance_id
      stmgrId = instance.stmgr_id
      name = instance.info.component_name
      stmgrInfo = physicalPlan["stmgrs"][stmgrId]
      host = stmgrInfo["host"]
      cwd = stmgrInfo["cwd"]
      shell_port = stmgrInfo["shell_port"]

      index = int(instance.info.component_index) + 1
      group_name = "container_%d" % index
      igroup = instance_groups.get(group_name, list())
      igroup.append(instance_id)
      instance_groups[group_name] = igroup

      physicalPlan["instances"][instance_id] = {
          "id": instance_id,
          "name": name,
          "stmgrId": stmgrId,
          "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id),
      }
      physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id)
      if name in physicalPlan["spouts"]:
        physicalPlan["spouts"][name].append(instance_id)
      else:
        physicalPlan["bolts"][name].append(instance_id)

    physicalPlan["instance_groups"] = instance_groups

    return physicalPlan
Exemplo n.º 7
0
  def extract_physical_plan(self, topology):
    """
    Returns the representation of physical plan that will
    be returned from Tracker.
    """
    physicalPlan = {
        "instances": {},
        "stmgrs": {},
        "spouts": {},
        "bolts": {},
        "config": {},
    }

    if not topology.physical_plan:
      return physicalPlan

    spouts = topology.spouts()
    bolts = topology.bolts()
    stmgrs = None
    instances = None

    # Physical Plan
    stmgrs = list(topology.physical_plan.stmgrs)
    instances = list(topology.physical_plan.instances)

    # Configs
    if topology.physical_plan.topology.topology_config:
      for kvs in topology.physical_plan.topology.topology_config.kvs:
        if kvs.value:
          physicalPlan["config"][kvs.key] = kvs.value
        elif kvs.serialized_value:
          # currently assumes that serialized_value is Java serialization
          # when multi-language support is added later, ConfigValueType should be checked

          # Hexadecimal byte array for Serialized objects
          try:
            pobj = javaobj.loads(kvs.serialized_value)
            physicalPlan["config"][kvs.key] = {
                'value' : json.dumps(pobj,
                                     default=lambda custom_field: custom_field.__dict__,
                                     sort_keys=True,
                                     indent=2),
                'raw' : utils.hex_escape(kvs.serialized_value)}
          except Exception:
            physicalPlan["config"][kvs.key] = {
                'value' : 'A Java Object',
                'raw' : utils.hex_escape(kvs.serialized_value)}
    for spout in spouts:
      spout_name = spout.comp.name
      physicalPlan["spouts"][spout_name] = []
    for bolt in bolts:
      bolt_name = bolt.comp.name
      physicalPlan["bolts"][bolt_name] = []

    for stmgr in stmgrs:
      host = stmgr.host_name
      cwd = stmgr.cwd
      shell_port = stmgr.shell_port if stmgr.HasField("shell_port") else None
      physicalPlan["stmgrs"][stmgr.id] = {
          "id": stmgr.id,
          "host": host,
          "port": stmgr.data_port,
          "shell_port": shell_port,
          "cwd": cwd,
          "pid": stmgr.pid,
          "joburl": utils.make_shell_job_url(host, shell_port, cwd),
          "logfiles": utils.make_shell_logfiles_url(host, shell_port, cwd),
          "instance_ids": []
      }

    for instance in instances:
      instance_id = instance.instance_id
      stmgrId = instance.stmgr_id
      name = instance.info.component_name
      stmgrInfo = physicalPlan["stmgrs"][stmgrId]
      host = stmgrInfo["host"]
      cwd = stmgrInfo["cwd"]
      shell_port = stmgrInfo["shell_port"]

      physicalPlan["instances"][instance_id] = {
          "id": instance_id,
          "name": name,
          "stmgrId": stmgrId,
          "logfile": utils.make_shell_logfiles_url(host, shell_port, cwd, instance.instance_id),
      }
      physicalPlan["stmgrs"][stmgrId]["instance_ids"].append(instance_id)
      if name in physicalPlan["spouts"]:
        physicalPlan["spouts"][name].append(instance_id)
      else:
        physicalPlan["bolts"][name].append(instance_id)

    return physicalPlan