# print(grubbs.max_test_outliers(data, alpha=0.05)) # result = grubbs.max_test_outliers(data, alpha=0.05) for window_size in window_sizes: for I in IS: for L in LS: print("start L {}, I {} ,window_size {}".format( L, I, window_size)) files_list = ut_lc.getListLight(height=L, duration=I) rows = [] for index, file_name in enumerate(files_list): print("file : {}".format(file_name)) rawInstances, transList = ut_service.getDataLC_test( file_name, L=L, I=I) timestamp = [*range(0, len(rawInstances), 1)] FdataMDF_dy = ut_gen.genListDyBin(instances=rawInstances, timestamp=timestamp, windowSize=window_size, initialBin=3) # FdataMDF_Fix = ut_gen.genFixBin(binSize=binSize, instances=rawInstances,isExtract= False) gb_core = gb_test() for instance in FdataMDF_dy["window"]: gb_core.add_element(instance.get_representation()) for alpha in alphas: # countFile = 0 # foundList = gb_core.get_max_outliers(alpha=alpha) # if foundList: # countFile = 1 # rows.append([file_name,L,I,alpha,countFile,foundList]) indexBin = gb_core.get_max_index(alpha=alpha) detect_tran = 0 if indexBin:
windows_size = [10, 20] if __name__ == '__main__': list_files = ut_db.getAnswerMDF_toDB() for index, row in enumerate(list_files): file_name = row['file_name'] FdataMDF, FtimestampMDF = ut_service.getMDFData(file_name) UTC = ut_astro.convertList_MJDtoUTC(FtimestampMDF) raw = {"timestamp": FtimestampMDF, 'instances': FdataMDF, 'UTC': UTC} rows_data = [] for ini_bin in inis_bin: for window_size in windows_size: FdataMDF_dy = ut_gen.genListDyBin(instances=FdataMDF, timestamp=FtimestampMDF, windowSize=window_size, isOnline=False, initialBin=ini_bin) rows_data.append({ "timestamp": FtimestampMDF, 'instances': FdataMDF_dy['sketchInstances'], 'UTC': UTC, 'title_name': 'win {} , ini {}'.format(window_size, ini_bin) }) ut_bokeh.export_HTML_mutilples(fileName="{}.html".format(file_name), raw=raw,
plt.ylim((min_var, max_var)) plt.legend( [a.collections[0], b1], [ "learned frontier", "training observations" # , # "new regular observations", # "new abnormal observations", ], loc="upper left", prop=matplotlib.font_manager.FontProperties(size=11), ) # plt.xlabel( # "errors novel regular: %d/40 ; errors novel abnormal: %d/40" # % (n_error_test, n_error_outliers) # ) plt.show() if __name__ == '__main__': fileName = '41242369943869440_0' height = '3' duration = '200' lightData = ut_light.getDataFromFile(fileName=fileName, height=height, duration=duration) dyResult = ut_data.genListDyBin(instances=lightData["instances"], timestamp=lightData["timestamp"], windowSize=20) computeLocalOutlierFactor(dyResult)
ini_bin = 5 # window_size = 10 # sql_file = 'getFileForSim.sql' sql_file = 'getFileForSim2.sql' distance = 2000 if __name__ == '__main__': list_files = ut_db.getFileListFromSQL(sqlFileName=sql_file) for index, main_file in enumerate(list_files): FdataMain, FtimestampMain = ut_service.getMDFData(main_file) windowMain = int(len(FdataMain) / 50) mainDy = ut_gen.genListDyBin(instances=FdataMain, windowSize=windowMain, timestamp=FtimestampMain, initialBin=ini_bin) near_files = ut_db.getNearStars(fileName=main_file, maxDistance=distance) rows_data = [] for near_file in near_files: try: FdataMDF, FtimestampMDF = ut_service.getMDFData( near_file["file_target"]) windowTarget = int(len(FdataMDF) / 50) targetDy = ut_gen.genListDyBin(instances=FdataMDF, timestamp=FtimestampMDF, windowSize=windowTarget, isOnline=False, initialBin=ini_bin) f_test, p_value = ut_cal.Ftest_cal(
from numpy import histogram if __name__ == '__main__': windowSize = 40 upper_bound = 1.87519737 lower_bound = 0.53327720 listFile = ut.txt_to_list(csv_name="start_file.csv") # for windowSize in listWindow: for index, fileName1 in enumerate(listFile): print("File Name {}".format(fileName1)) pattern = fileName1.split("_")[4] lightData1 = ut_mdf.getDataFromFile(fileName=fileName1) dyResult1 = ut_data.genListDyBin(instances=lightData1["instances"], timestamp=lightData1["timestamp"], windowSize=windowSize) targetList = ut_mdf.getListMDF(pattern=pattern) for fileName2 in targetList: try: lightData2 = ut_mdf.getDataFromFile(fileName=fileName2) isOverlap = ut_mdf.isOverlapTimestamp(lightData1["timestamp"], lightData2["timestamp"]) if isOverlap: dyResult2 = ut_data.genListDyBin( instances=lightData2["instances"], timestamp=lightData2["timestamp"], windowSize=windowSize) # computeLocalOutlierFactor(dyResult1,windowSize=windowSize) F_test = dyResult1["variance"] / dyResult2["variance"] if lower_bound <= F_test <= upper_bound:
isOnline = False index_list = [*range(0, window_size - 1)] if __name__ == '__main__': for height in heights: for duration in durations: listFiles = ut_lc.getListLight(height=height, duration=duration) folder_name = "I{}_L{}".format(height, duration) ut.checkFolderandCreate(folder_name) for lc_file in listFiles: data = ut_lc.getDataFromFile(fileName=lc_file, height=height, duration=duration) dy_data = ut_gen.genListDyBin(instances=data['instances'], timestamp=data['timestamp'], windowSize=window_size, initialBin=initial_Bin, isOnline=False) results = [] prior_index = 0 for index_bin in index_list: prior_bin = dy_data['window'][index_bin] cur_bin = dy_data['window'][index_bin + 1] prior_n = prior_bin.get_number_instance() cur_n = cur_bin.get_number_instance() priorMean = prior_bin.get_representation() curMean = cur_bin.get_representation() priorSDError = prior_bin.get_SDError() curSDError = cur_bin.get_SDError() K_star = ut_de.detection_SKmethod( curMean=curMean,
filesize = os.path.getsize(pathFile) if filesize != 0: result_found = 0 file = open(pathFile, 'rb') FdataMDF = [float(item) for item in ut.txt_to_list(pathFile)] path_lc_timestamp_path = "{}{}{}.txt".format( dataset_path, lc_timestamp_path, fileName) x_axis = [ float(item) for item in ut.txt_to_list(path_lc_timestamp_path) ] # window_size = int((len(FdataMDF) / initialBin) * theshold_compression) window_size = 10 FdataMDF_dy = ut_gen.genListDyBin(instances=FdataMDF, timestamp=x_axis, windowSize=window_size, initialBin=3, isOnline=False) gb_core = gb_test() for instance in FdataMDF_dy["window"]: gb_core.add_element(instance.get_representation()) foundList = gb_core.get_max_outliers(alpha=alpha) if foundList: result_found = 1 row = [fileName, alpha, window_size, result_found] ut.add_to_txt(row, csv_name="dy_MDF window{}.txt".format(window_size), is_append=True) print("END : File .... {}".format(fileName)) except: