Пример #1
0
def granger_df_producer_worker(lst):
    file_name = lst[0]
    file_path = lst[1]
    save_path = lst[2]
    stationary_path = lst[3]
    dct = lst[4]
    combinations = lst[5]
    liste = lst[6]
    causality_path = lst[7]

    start = time.time()

    if file_name in os.listdir(causality_path):
        print('Skipping file' + file_name + ' since it is already calculated')
        return

    if file_name not in os.listdir(stationary_path):
        print('Skipping file' + file_name +
              ' since it is not in the stationary dir')
        return
    else:
        stationary_file = pickle.load(open(file_path, 'rb'))
        print(file_name)

    df = pd.DataFrame(index=liste, columns=liste)

    for X, Y in combinations:
        if X in dct and Y in dct:
            if dct[X].values.dtype == np.dtype(
                    '<U10') or dct[Y].values.dtype == np.dtype('<U10'):
                continue

            ### For now use this later maybe we need just make stationary
            if stationary_file[X][0] == 1 and stationary_file[X][1] == 1:
                ergebnisX = True
            else:
                ergebnisX = False

            if stationary_file[Y][0] == 1 and stationary_file[Y][1] == 1:
                ergebnisY = True
            else:
                ergebnisY = False

            if not (ergebnisX and ergebnisY):
                df.at[X, Y] = 'N/A'
                continue

            data = pd.concat([dct[X], dct[Y]], axis=1)
            data = Database.fillna(data)

            try:
                output = Granger.test(data, 60, 0.05, True, True)
                df.at[X, Y] = output
            except:
                print('Granger test failed at combination: ' + X + '/' + Y)
                continue
    pickle.dump(df, open(save_path, 'wb'))

    stop = time.time() - start
    print(file_name + 'finished after ' + str(stop) + ' s')
Пример #2
0
def statistic_iterate_all_worker(liste,
                                 dct,
                                 visual=False,
                                 path=cd.paths.stationary_results):
    name = dct[0]
    dct = dct[1]

    file_path = os.path.join(path, name)

    if name in os.listdir(path):
        print(name + ' already calculated. skipping ...')
        return

    print(name)
    file_adf_counter = np.full(len(liste), np.nan)
    file_kpss_counter = np.full(len(liste), np.nan)
    for j, subcomponent in enumerate(liste):
        if subcomponent in dct:

            data = pd.DataFrame(data=dct[subcomponent],
                                index=dct[subcomponent.split('_')[0] +
                                          '_line_number'],
                                columns=[subcomponent])
            data = Database.fillna(data, filler=False)

            if data[subcomponent].dtype == 'O':
                continue

            plotter(data, subcomponent, visual)
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore")
                adf = adf_test(data, subcomponent, visual)
                kpss = kpss_test(data, subcomponent, visual)

            if adf:
                #    adf_counter_average[j] += 1
                file_adf_counter[j] = 1
            else:
                file_adf_counter[j] = 0

            if kpss:
                #    kpss_counter_average[j] += 1
                file_kpss_counter[j] = 1
            else:
                file_kpss_counter[j] = 0

    out = pd.DataFrame(data=np.vstack((file_adf_counter, file_kpss_counter)),
                       columns=liste,
                       index=['adf', 'kpss'])
    path = os.path.join(path, name)
    pickle.dump(out, open(file_path, 'wb'))
Пример #3
0
from CrashDiag.Preproc.Database import Database, iterDataframes, iterDict
import CrashDiag.Detection.Granger as Granger
import time
import pickle
import pandas as pd
import CrashDiag as cd
import warnings
import os

#db = Database('SampleLog')
db = Database()
stationary_path = cd.paths.stationary_results

most_subcompos = ['ATT_DesRoll', 'ATT_Roll']
#most_subcompos = pickle.load(open('most_subcomponents.p', 'rb')).index[:99]
#most_subcompos = list(most_subcompos)

start = time.time()
n = 0
cd.data.store(n, 'test')
combinations = []

for X in most_subcompos:
    for Y in most_subcompos:
        if (X, Y) in combinations or (Y, X) in combinations or X == Y:
            continue
        combinations.append((X, Y))
output = []

for file_name, dct in iterDict(db, most_subcompos, querytype='or',
                               linenr=True):
Пример #4
0
from CrashDiag.Preproc.Database import Database
from CrashDiag.Detection.Correlation import correlation
import pickle


db = Database()

most_subcomps = pickle.load(open('../Misc/most_subcomponents.p', 'rb'))
most_subcomps_list = list(most_subcomps.index)

top100trunc = []
remove_list = ['PM_I2CErr', 'PM_INSErr', 'ERR_ECode', 'MSG_Message', 'PM_INAVErr', 'DU32_Value', 'DU32_Id',
               'CMD_Cld', 'CMD_Copt', 'CMD_Prm1']



for subcomp in most_subcomps_list:
    if subcomp in remove_list:
        continue
    if 'Time' in subcomp:
        continue
    else:
        top100trunc.append(subcomp)

top100trunc = top100trunc[:100]

corr = correlation(db, top100trunc, n_files=10)


pickle.dump(corr, open('corr.p', 'wb'))
Пример #5
0
from CrashDiag.Preproc.Database import Database, iterDataframes
from CrashDiag.Detection.Causality import statistic_iterate_all
import pickle
import time

if __name__ == "__main__":
    # db laden
    db = Database('SampleLog')

    #100 most
    #most_subcomps = pickle.load(open('../Misc/most_subcomponents.p', 'rb'))
    #most_subcomps_list = list(most_subcomps.index)

    #top100trunc = []
    #remove_list = ['PM_I2CErr', 'PM_INSErr', 'ERR_ECode', 'MSG_Message', 'PM_INAVErr', 'DU32_Value', 'DU32_Id',
    #               'CMD_Cld', 'CMD_Copt', 'CMD_Prm1']

    most_subcomps_list = ['RCIN_C1', 'RCIN_C2', 'RCIN_C3', 'RCIN_C4']

    #for subcomp in most_subcomps_list:
    #    if subcomp in remove_list:
    #        continue
    #    if 'Time' in subcomp:
    #        continue
    #    else:
    #        top100trunc.append(subcomp)

    #top100trunc = top100trunc[:100]

    start = time.time()
    statistic_iterate_all(db, most_subcomps_list)
Пример #6
0
from CrashDiag.Preproc.Database import Database, iterDataframes
from CrashDiag.Detection.Causality import statistic_iterate_log_file
import matplotlib.pyplot as plt
import pandas as pd

# db laden
db = Database(dbname='SampleLog')

# initialize list with the variables

liste = ['CURR_CurrTot', 'ATT_DesRoll']

ergebnis = statistic_iterate_log_file(db, 'sample.log', liste, visual=True)