def create_burst_df():
    files = glob.glob('burst_result/*/*')
    evs = sorted(list(set(["_".join(fi.split('/')[-1].split("_")[1:]) for fi in files])))

    cols = evs

    burst_df = pd.DataFrame(index=pd.date_range('20120101','20130331'), columns=cols)
    for fi in files:
        event_name = "_".join(fi.split('/')[-1].split('_')[1:])

        print(fi)

        try: # some bursts are detected
            ev,day,data = sb.open_dump(fi)
        except: # no bursts are detected
            continue

        if event_name != ev:
            print('event name error')
            print(event_name,ev,day,data)
            continue
        else:
            d = pd.to_datetime(day)
            burst_df.loc[d,ev] = data

    return burst_df
def burst2get_data(burst_file):
    # get_data = collections.defaultdict(lambda: 0)
    get_data = sb.open_dump('burst_file')
    for line in open(burst_file,"r"):
        if line[0] == '(':
            get_date = "".join([a.strip().zfill(2) for a in line[1:-2].split(",")])
            get_data[get_date] = []
        elif line.strip()[0] == "[":
            st = line.strip()[1:-2].split(",")[1].strip()
            en = line.strip()[1:-2].split(",")[2].strip()
            get_data[get_date].append((float(st),float(en)))

    return get_data
示例#3
0
def cnt_logs(DUMP_NAME, DATE):
    obj = sb.open_dump('dumps/' + str(DATE) + '/' + DUMP_NAME)
    return (len(obj))
示例#4
0
    dbname = 's4causality.db'
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()
    cur.execute('''select srcID,srcHost,dstID,dstHost from event''')
    edge = cur.fetchall()
    edge = [
        sorted((str(e[0]) + "_" + e[1], str(e[2]) + "_" + e[3])) for e in edge
    ]
    edge = [e[0] + "." + e[1] for e in edge]
    edge = list(set(edge))
    edge = [set(e.split(".")) for e in edge]

    print(len(edge))

    co_burst = sb.open_dump('co_prob_df')
    co_burst = list(co_burst['EvPair'].values)
    co_burst = [set(x) for x in co_burst]

    burst = sb.open_dump('burst_df')
    burst_ev = [x for x in burst.columns if len(burst[x].dropna()) != 0]

    burst_noburst = []
    for ep in edge:
        if ep not in co_burst:
            ep = list(ep)
            if ep[0] in burst_ev:
                burst_noburst.append(ep)
            if ep[1] in burst_ev:
                burst_noburst.append(ep[::-1])
示例#5
0
        argv[0], argv[1], argv[2], argv[3])
    cur.execute(query)
    r = cur.fetchall()
    result = []
    for i in r:
        result.append("".join(i[0].split("-")))
    return result


if __name__ == "__main__":

    dbname = 's4causality.db'
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()

    edge_burst = sb.open_dump('rp_edge_coburst')

    print(len(edge_burst))

    burst = sb.open_dump('burst_df')
    burst_ev = [x for x in burst.columns if len(burst[x].dropna()) != 0]

    result = []
    for evp in edge_burst['EvPair']:
        bday1 = burst[evp[0]].dropna().index.values
        bday1 = [str(x).split('T')[0].replace("-", "") for x in bday1]
        bday2 = burst[evp[1]].dropna().index.values
        bday2 = [str(x).split('T')[0].replace("-", "") for x in bday2]
        bday = list(set(bday1) & set(bday2))
        eday = get_eday(evp)
        if len(set(bday) & set(eday)) != 0:
示例#6
0
        elif line.strip()[0] == "[":
            st = line.strip()[1:-2].split(",")[1].strip()
            en = line.strip()[1:-2].split(",")[2].strip()
            get_data[get_date].append((float(st),float(en)))

    return get_data


event = sys.argv[1]
files = glob.glob('dumps/*/{}'.format(event))


x = []
y = []
for fi in files:
    data = sb.open_dump(fi)
    print(data[0].date(),":",len(data))
    x.append(data[0].date())
    y.append(len(data))


# DUMP_NAME = sys.argv[1]
# if len(sys.argv) > 2:
#     PLOT_BURST = int(sys.argv[2])
# else:
#     PLOT_BURST = 0
#
# with open(DUMP_NAME,"rb") as f:
#     obj = pickle.load(f, encoding="bytes")
#
# tmp = set( [datetime.datetime(row.year,row.month,row.day) for row in obj ] )
示例#7
0
    temp_id = int(ev_name.split('_')[0])
    if temp_id < 500:
        return pf + '0000-0499/' + ev_name + '.dump'
    elif temp_id < 1000:
        return pf + '0500-0999/' + ev_name + '.dump'
    elif temp_id < 1500:
        return pf + '1000-1499/' + ev_name + '.dump'
    else:
        return pf + '1500-1999/' + ev_name + '.dump'


if __name__ == "__main__":
    if len(sys.argv) == 3:
        dump = sys.argv[1]
        ev_day = sys.argv[2]
        event = sb.open_dump(dump)
        print(linear_rms(event, ev_day))

    else:
        burst_df = sb.open_dump(sys.argv[1])
        for i in burst_df.iteritems():
            tmp = i[1].dropna()
            if len(tmp) != 0:
                print(tmp.name)
                dump_name = get_dump_path(tmp.name)
                event = sb.open_dump(dump_name)
                for ev_day in tmp.index:
                    rms = linear_rms(event, ev_day.strftime('%Y%m%d'))
                    if not rms > 0.1:
                        print(ev_day, '\t', rms)
def get_log(DUMP_NAME, DATE):
    obj = sb.open_dump('dumps/' + str(DATE) + '/' + DUMP_NAME)
    return (obj)
prefix以下が、
prefix/0000-0499/hoge.dump

'''

days = [i.split('/')[-1] for i in glob.glob('dumps/*')]

for day in days:
    hosts = set(
        [i.split('_')[-1] for i in glob.glob('dumps/{0}/*'.format(day))])
    for host in hosts:
        files = glob.glob('dumps/{0}/*_{1}'.format(day, host))
        host_data = []
        for fi in files:
            host_data.extend(sb.open_dump(fi))

        with open('dumps_host/{0}/{1}'.format(day, day + '_' + host),
                  'wb') as f:
            pickle.dump(host_data, f)

# # files = glob.glob('dump_files/0000-0499/*_tokyo-dc-rm.dump') # ワイルドカードが使用可能
# files = glob.glob('{0}/*-*/*'.format(PREFIX)) # ワイルドカードが使用可能
#
# host_list = []
# for fi in files:
#     host_list.append(fi.split('/')[-1].split('.')[0].split('_')[1])
#
# print(set(host_list),len(set(host_list)))
# #
# # with open("host_list.txt","w") as f:
示例#10
0
# -*- coding: utf-8 -*-
from scipy import arange, hamming, sin, pi
from scipy.fftpack import fft, ifft, fftfreq
import matplotlib.pyplot as plt
import search_burst as sb
import numpy as np
import pandas as pd
import sys
import datetime

event = sb.open_dump(sys.argv[1])
day = sys.argv[2]
ev_year = int(day[:4])
ev_month = int(day[4:6])
ev_day = int(day[6:8])

ev_date = datetime.date(ev_year, ev_month, ev_day)
plot_data = [row.time() for row in event if row.date() == ev_date]
ev_data = [row.hour * 3600 + row.minute * 60 + row.second for row in plot_data]

fs = 1  # Sampling rate
L = 2**16  # Signal length

x = [10. if i in ev_data else 0. for i in range(L)]

# test data
# x = [10. if i%3600 == 0 else  0. for i in range(L)]

# # 440[Hz]のサイン波を作る。
# sine_440 = sin(2. * pi * arange(L) * 440. / fs)
# # 600[Hz]のサイン波を作る。
示例#11
0
import sqlite3
import numpy as np
import pandas as pd
import search_burst as sb
import pickle


def search_pair_query(id1, host1, id2, host2):
    query = 'select date from date where pairID in(select pairID from event where (srcID={0} and srcHost="{1}" and dstID={2} and dstHost="{3}") or (srcID={2} and srcHost="{3}" and dstID={0} and dstHost="{1}"));'.format(
        id1, host1, id2, host2)
    return query


if __name__ == "__main__":
    co_prob_df = sb.open_dump('co_prob_df')

    dbname = 's4causality.db'
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()

    edge_coburst_pair = []
    for row in co_prob_df['EvPair'].values:
        id1, host1 = row[0].split('_')
        id2, host2 = row[1].split('_')
        q = search_pair_query(id1, host1, id2, host2)
        cur.execute('''{}'''.format(q))
        q_result = cur.fetchall()
        if len(q_result) != 0:
            edge_coburst_pair.append(row)
        else:
            pass
示例#12
0
def cnt_logs(DUMP_NAME, DATE):
    obj = sb.open_dump(DUMP_NAME)
    return (len(obj))
# coding: UTF-8
'''
coburst, edgeプロット
'''

import collections
import sys
import numpy as np
import matplotlib.pyplot as plt
import pybursts
import datetime
import matplotlib.dates as mdates
import pickle
import search_burst as sb

co_prob_df = sb.open_dump('co_prob_df')
co_edge_df = sb.open_dump('rp_edge_coburst')

xj = co_prob_df['x']
yj = co_prob_df['y_jaccard'] * (10**5)
xs = co_prob_df['x']
ys = co_prob_df['y_simpson'] * (10**5)
xej = co_edge_df['x']
yej = co_edge_df['y_jaccard'] * (10**5)
xes = co_edge_df['x']
yes = co_edge_df['y_simpson'] * (10**5)

df_bool = [False] * co_prob_df.shape[0]
for i in [
        x for x in co_prob_df['EvPair']
        if (x[0][:3] == '10_' or x[1][:3] == '11_') or (