Exemplo n.º 1
0
def samples_to_df(h, need_line):
    """Convert a parsed perf event list to a pandas table.
       The pandas table contains all events in a easily to process format.
       The pandas table has callchain_aux and branch_aux fields pointing
       to Aux object defining the callchains/branches."""
    ev = perfdata.get_events(h)
    index = []
    data = defaultdict(list)
    callchains = Aux()
    branches = Aux()

    used = Counter()
    mm = mmap.MmapTracker()

    for n in range(0, len(ev)):
        mm.lookahead_mmap(ev, n)

        j = ev[n]
        if j.type != "SAMPLE":
            continue

        mm.update_sample(j)
        add = lambda k, i: do_add(data, used, k, i)

        filename, mmap_base, foffset = mm.resolve(j.pid, j.ip)
        if filename == "[kernel.kallsyms]_text":
            filename = None
        add('filename', filename)
        sym, soffset, line = elf.resolve_ip(filename, foffset, j.ip, need_line)
        add('symbol', sym)
        add('line', line)
        add('soffset', soffset)
        if 'callchain' in j and j.callchain:
            id = callchains.add(j.callchain.caller,
                    lambda: resolve_chain(j.callchain, j, mm, need_line))
            add('callchain', id)
        if 'branch_stack' in j and j.branch_stack:
            branch = j.branch_stack.branch
            id = branches.add(map(lambda x: (x['from'], x.to), branch),
                    lambda: resolve_branch(branch, j, mm, need_line))
            add('branch', id)
        kernel, guest, hv = cpumodes[j['cpumode']]
        add('kernel', kernel)
        add('guest', guest)
        add('hv', hv)
        for name in j:
            if name not in ignored:
                if j[name]:
                    used[name] += 1
                data[name].append(j[name])
        index.append(int(j["time"]))
    for j in data.keys():
        if used[j] == 0:
            del data[j]
    df = pd.DataFrame(data, index=index, dtype=np.uint64)
    for i in bool_fields:
        df[i] = df[i].astype('bool')
    df.branch_aux = branches
    df.callchain_aux = callchains
    return df
Exemplo n.º 2
0
def samples_to_df(h):
    ev = perfdata.get_events(h)
    index = []
    data = defaultdict(list)
    procs = dict()

    maps = defaultdict(list)
    pnames = defaultdict(str)
    used = Counter()

    # comm do not necessarily appear in order
    # first build queue of comm in order
    updates = []
    for j in ev:
        # no time stamp: assume it's synthesized and kernel
        if j.type == 'MMAP' and j.pid == -1 and j.tid == 0:
            bisect.insort(maps[j.pid], (j.addr, j.len, j.filename))
        elif j.type in ('COMM','MMAP'):
            bisect.insort(updates, (j.time2, j))

    for j in ev:
        add = lambda k, i: do_add(data, used, k, i)

        if j.type != "SAMPLE":
            continue

        # process pending updates
        while len(updates) > 0 and j.time >= updates[0][0]:
            u = updates[0][1]
            del updates[0]
            if u.type == 'MMAP':
                pid = u.pid
                bisect.insort(maps[pid], (u.addr, u.len, u.filename))
            elif u.type == 'COMM':
                maps[u.pid] = []
                pnames[u.pid] = u.comm
    
        filename, offset = resolve(maps, j.pid, j.ip)        
        add('filename', filename)
        add('foffset', offset)
        sym, offset, line = None, None, None
        if filename and filename.startswith("/"):
            sym, offset, line = elf.resolve_addr(filename, j.ip)
        add('symbol', sym)
        add('soffset', offset)
        add('line', line)
        #if 'callchain' in j:
        #    resolve_chain(j['callchain'], j, maps)
        for name in j:
            if name not in ignored:
                if j[name]:
                    used[name] += 1
                data[name].append(j[name])
        index.append(pd.Timestamp(j["time"]))
    for j in data.keys():
        if used[j] == 0:
            del data[j]
    return pd.DataFrame(data, index=index, dtype=np.uint64)
Exemplo n.º 3
0
def samples_to_df(h):
    ev = perfdata.get_events(h)
    index = []
    data = defaultdict(list)
    procs = dict()

    maps = defaultdict(list)
    pnames = defaultdict(str)
    used = Counter()

    # comm do not necessarily appear in order
    # first build queue of comm in order
    updates = []
    for j in ev:
        # no time stamp: assume it's synthesized and kernel
        if j.type == 'MMAP' and j.pid == -1 and j.tid == 0:
            bisect.insort(maps[j.pid], (j.addr, j.len, j.filename))
        elif j.type in ('COMM','MMAP'):
            bisect.insort(updates, (j.time2, j))

    for j in ev:
        if j.type != "SAMPLE":
            continue

        # process pending updates
        while len(updates) > 0 and j.time >= updates[0][0]:
            u = updates[0][1]
            del updates[0]
            if u.type == 'MMAP':
                pid = u.pid
                bisect.insort(maps[pid], (u.addr, u.len, u.filename))
            elif u.type == 'COMM':
                maps[u.pid] = []
                pnames[u.pid] = u.comm
    
        filename, offset = resolve(maps, j.pid, j.ip)
        data['filename'].append(filename)
        data['offset'].append(offset)
        used['offset'] += 1
        used['filename'] += 1
        for name in j:
            if name not in ignored:
                if j[name]:
                    used[name] += 1
                data[name].append(j[name])
        # XXX assumes time exists
        index.append(pd.Timestamp(j["time"]))
    for j in data.keys():
        if used[j] == 0:
            del data[j]
    return pd.DataFrame(data, index=index, dtype=np.uint64)
Exemplo n.º 4
0
def samples_to_df(h, need_line):
    """Convert a parsed perf event list to a pandas table.
       The pandas table contains all events in a easy to process format.
       The pandas table has callchain_aux and branch_aux fields pointing
       to Aux object defining the callchains/branches."""
    ev = perfdata.get_events(h)
    index = []
    data = defaultdict(list)
    callchains = Aux()
    branches = Aux()

    used = Counter()
    mm = mmap.MmapTracker()

    numsample = 0
    for n in range(0, len(ev)):
        mm.lookahead_mmap(ev, n)

        j = ev[n]
        print(j)
        if j.type != "SAMPLE":
            continue

        numsample += 1

        mm.update_sample(j)

        def add(k, i):
            data[k].append(i)
            used[k] += 1

        filename, mmap_base, foffset = mm.resolve(j.pid, j.ip)
        if filename == "[kernel.kallsyms]_text":
            filename = None
        add('filename', filename)
        sym, soffset, line = elf.resolve_ip(filename, foffset, j.ip, need_line)
        add('symbol', sym)
        add('line', line)
        add('soffset', soffset)
        if 'callchain' in j and j.callchain:
            id = callchains.add(
                j.callchain.caller,
                lambda: resolve_chain(j.callchain, j, mm, need_line))
            add('callchain', id)
        if 'branch_stack' in j and j.branch_stack:
            branch = j.branch_stack.branch
            id = branches.add(map(lambda x: (x['from'], x.to), branch),
                              lambda: resolve_branch(branch, j, mm, need_line))
            add('branch', id)
        kernel, guest, hv = cpumodes[j['cpumode']]
        add('kernel', kernel)
        add('guest', guest)
        add('hv', hv)
        for name in j:
            if name not in ignored:
                if j[name]:
                    used[name] += 1
                data[name].append(j[name])
        index.append(int(j["time"]))
    for j in data.keys():
        if used[j] == 0:
            del data[j]
    df = pd.DataFrame(data, index=index, dtype=np.uint64)
    if numsample > 0:
        for i in bool_fields:
            df[i] = df[i].astype('bool')
    df.branch_aux = branches
    df.callchain_aux = callchains
    return df