Пример #1
0
    def pre_process(self, profile):
        summary = profile.find_by_name("Summary")
        exec_summary_json = utils.parse_exec_summary(
            summary.val.info_strings.get('ExecSummary')
        ) if summary.val.info_strings.get('ExecSummary') else {}
        stats_mapping = {
            'Query Compilation': {
                'Metadata load finished': 'MetadataLoadTime',
                'Analysis finished': 'AnalysisTime',
                'Single node plan created': 'SinglePlanTime',
                'Runtime filters computed': 'RuntimeFilterTime',
                'Distributed plan created': 'DistributedPlanTime',
                'Lineage info computed': 'LineageTime'
            },
            'Query Timeline': {
                'Planning finished':
                'PlanningTime',
                'Completed admission':
                'AdmittedTime',
                'Rows available':
                'QueryTime',
                'Unregister query':
                'EndTime',
                '((fragment instances)|(remote fragments)|(execution backends).*) started':
                'RemoteFragmentsStarted'
            }
        }
        # Setup Event Sequence
        if summary:
            for s in summary.val.event_sequences:
                sequence_name = s.name
                sequence = stats_mapping.get(sequence_name)
                if sequence:
                    duration = 0
                    for i in range(len(s.labels)):
                        event_name = s.labels[i]
                        event_duration = s.timestamps[i] - duration

                        if sequence.get(event_name):
                            summary.val.counters.append(
                                models.TCounter(name=sequence.get(event_name),
                                                value=event_duration,
                                                unit=5))
                            sequence.pop(event_name)
                        else:
                            for key, value in sequence.iteritems():
                                if re.search(key, event_name, re.IGNORECASE):
                                    summary.val.counters.append(
                                        models.TCounter(name=value,
                                                        value=event_duration,
                                                        unit=5))
                                    sequence.pop(key)
                                    break

                        duration = s.timestamps[i]

            for key, value in stats_mapping.get(
                    'Query Compilation').iteritems():
                summary.val.counters.append(
                    models.TCounter(name=value, value=0, unit=5))
            for key, value in stats_mapping.get('Query Timeline').iteritems():
                summary.val.counters.append(
                    models.TCounter(name=value, value=0, unit=5))

            missing_stats = {}
            for key in [
                    'Tables Missing Stats', 'Tables With Corrupt Table Stats'
            ]:
                if summary.val.info_strings.get(key):
                    tables = summary.val.info_strings.get(key).split(',')
                    for table in tables:
                        missing_stats[table] = 1

        def add_host(node, exec_summary_json=exec_summary_json):
            is_plan_node = node.is_plan_node()
            node_id = node.id()
            nid = int(node_id) if node_id and node.is_regular() else -1
            # Setup Hosts & Broadcast
            if node_id and node.is_regular() and nid in exec_summary_json:
                exec_summary_node = exec_summary_json.get(nid, {})
                node.val.counters.append(
                    models.TCounter(name='Hosts',
                                    value=exec_summary_node.get('hosts', ''),
                                    unit=0))
                broadcast = 0
                if exec_summary_json[nid]['broadcast']:
                    broadcast = 1
                node.val.counters.append(
                    models.TCounter(name='Broadcast', value=broadcast, unit=0))

                if exec_summary_node.get('detail') and re.search(
                        r'\w*_SCAN_NODE', node.name(), re.IGNORECASE):
                    details = exec_summary_node['detail'].split()
                    node.val.info_strings['Table'] = details[0]
                    node.val.counters.append(
                        models.TCounter(name='MissingStats',
                                        value=missing_stats.get(details[0], 0),
                                        unit=0))

            # Setup LocalTime & ChildTime
            if node_id:
                child_time = 0
                for c in node.children:
                    if c.is_plan_node():
                        child_time += c.counter_map()['TotalTime'].value

                counter_map = node.counter_map()

                # Load the metric data as if the object would be loaded from the DB
                local_time = counter_map['TotalTime'].value - child_time

                # Make sure to substract the wait time for the exchange node
                if is_plan_node and re.search(r'EXCHANGE_NODE',
                                              node.val.name) is not None:
                    async_time = counter_map.get(
                        'AsyncTotalTime', models.TCounter(value=0)).value
                    inactive_time = counter_map['InactiveTotalTime'].value
                    if inactive_time == 0:
                        dequeue = node.find_by_name('Dequeue')
                        inactive_time = dequeue.counter_map().get(
                            'DataWaitTime', models.TCounter(
                                value=0)).value if dequeue else 0
                    local_time = counter_map[
                        'TotalTime'].value - inactive_time - async_time
                    child_time = counter_map['TotalTime'].value - local_time
                if re.search(
                        r'KrpcDataStreamSender',
                        node.val.name) is not None and node.fragment_instance:
                    local_time = counter_map.get(
                        'SerializeBatchTime', models.TCounter(value=0)).value
                    child_time = counter_map['TotalTime'].value - local_time
                if re.search(r'HBASE_SCAN_NODE', node.val.name):
                    local_time = counter_map[
                        'TotalTime'].value - counter_map.get(
                            'TotalRawHBaseReadTime(*)',
                            models.TCounter(value=0)).value
                    child_time = counter_map['TotalTime'].value - local_time
                if re.search(r'KUDU_SCAN_NODE', node.val.name):
                    child_time = counter_map.get(
                        'KuduClientTime', models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value
                    counter_map['TotalTime'].value = child_time + local_time
                if re.search(r'HDFS_SCAN_NODE', node.val.name):
                    child_time = counter_map.get(
                        'TotalRawHdfsReadTime(*)',
                        models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value
                    counter_map['TotalTime'].value = local_time + child_time

                # For Hash Join, if the "LocalTime" metrics
                if is_plan_node and re.search(r'HASH_JOIN_NODE',
                                              node.val.name) is not None:
                    if ("LocalTime" in counter_map):
                        local_time = counter_map["LocalTime"].value
                    else:
                        local_time = counter_map["ProbeTime"].value +\
                            counter_map["BuildTime"].value

                # Add two virtual metrics for local_time and child_time
                node.val.counters.append(
                    models.TCounter(name='LocalTime', value=local_time,
                                    unit=5))
                node.val.counters.append(
                    models.TCounter(name='ChildTime', value=child_time,
                                    unit=5))

        profile.foreach_lambda(add_host)
Пример #2
0
    def pre_process(self, profile):
        summary = profile.find_by_name("Summary")
        exec_summary_json = utils.parse_exec_summary(
            summary.val.info_strings['ExecSummary'])
        stats_mapping = {
            'Query Compilation': {
                'Metadata load finished': 'MetadataLoadTime',
                'Analysis finished': 'AnalysisTime',
                'Single node plan created': 'SinglePlanTime',
                'Runtime filters computed': 'RuntimeFilterTime',
                'Distributed plan created': 'DistributedPlanTime',
                'Lineage info computed': 'LineageTime'
            },
            'Query Timeline': {
                'Planning finished':
                'PlanningTime',
                'Completed admission':
                'AdmittedTime',
                'Rows available':
                'QueryTime',
                'Unregister query':
                'EndTime',
                '((fragment instances)|(remote fragments)|(execution backends).*) started':
                'RemoteFragmentsStarted'
            }
        }
        # Setup Event Sequence
        if summary:
            for s in summary.val.event_sequences:
                sequence_name = s.name
                sequence = stats_mapping.get(sequence_name)
                if sequence:
                    duration = 0
                    for i in range(len(s.labels)):
                        event_name = s.labels[i]
                        event_duration = s.timestamps[i] - duration
                        event_value = s.timestamps[i]

                        if sequence.get(event_name):
                            summary.val.counters.append(
                                models.TCounter(name=sequence.get(event_name),
                                                value=event_duration,
                                                unit=5))
                            sequence.pop(event_name)
                        else:
                            for key, value in sequence.iteritems():
                                if re.search(key, event_name, re.IGNORECASE):
                                    summary.val.counters.append(
                                        models.TCounter(name=value,
                                                        value=event_duration,
                                                        unit=5))
                                    sequence.pop(key)
                                    break

                        duration = s.timestamps[i]

            for key, value in stats_mapping.get(
                    'Query Compilation').iteritems():
                summary.val.counters.append(
                    models.TCounter(name=value, value=0, unit=5))
            for key, value in stats_mapping.get('Query Timeline').iteritems():
                summary.val.counters.append(
                    models.TCounter(name=value, value=0, unit=5))

        def add_host(node, exec_summary_json=exec_summary_json):
            is_plan_node = node.is_plan_node()
            node_id = node.id()
            # Setup Hosts & Broadcast
            if node_id and node.is_regular() and int(
                    node_id) in exec_summary_json:

                node.val.counters.append(
                    models.TCounter(
                        name='Hosts',
                        value=exec_summary_json[int(node_id)]["hosts"],
                        unit=0))
                broadcast = 0
                if exec_summary_json[int(node_id)]["broadcast"]:
                    broadcast = 1
                node.val.counters.append(
                    models.TCounter(name='Broadcast', value=broadcast, unit=0))

            # Setup LocalTime & ChildTime
            if node_id:
                child_time = 0
                for c in node.children:
                    if c.is_plan_node():
                        child_time += c.counter_map()['TotalTime'].value

                counter_map = node.counter_map()

                # Load the metric data as if the object would be loaded from the DB
                local_time = counter_map['TotalTime'].value - child_time

                # Make sure to substract the wait time for the exchange node
                if is_plan_node and re.search(r'EXCHANGE_NODE',
                                              node.val.name) is not None:
                    async_time = counter_map.get(
                        'AsyncTotalTime', models.TCounter(value=0)).value
                    local_time = counter_map['TotalTime'].value - counter_map[
                        'InactiveTotalTime'].value - async_time

                # For Hash Join, if the "LocalTime" metrics
                if is_plan_node and re.search(r'HASH_JOIN_NODE',
                                              node.val.name) is not None:
                    if ("LocalTime" in counter_map):
                        local_time = counter_map["LocalTime"].value
                    else:
                        local_time = counter_map["ProbeTime"].value +\
                            counter_map["BuildTime"].value

                # Add two virtual metrics for local_time and child_time
                node.val.counters.append(
                    models.TCounter(name='LocalTime', value=local_time,
                                    unit=5))
                node.val.counters.append(
                    models.TCounter(name='ChildTime', value=child_time,
                                    unit=5))

        profile.foreach_lambda(add_host)