def create_machine_datadir(datadir_path, machinedir_path, machine_name): pathobj= config.paths(datadir= datadir_path) new_pathobj= config.paths(datadir= machinedir_path) paths= [os.path.join(pathobj.TS_DIR, machine_name + "-" + name + ".data") for name in find_machine_timeseries(machine_name)] json_paths= [os.path.join(pathobj.TS_JSON_DIR, machine_name + "-" + name + ".data") for name in find_machine_timeseries(machine_name)] if not os.path.exists(new_pathobj.DATA_DIR): os.mkdir(new_pathobj.DATA_DIR) if not os.path.exists(new_pathobj.TS_DIR): os.mkdir(new_pathobj.TS_DIR) if not os.path.exists(new_pathobj.TS_JSON_DIR): os.mkdir(new_pathobj.TS_JSON_DIR) for json_path, path in zip(json_paths, paths): shutil.copy(path, new_pathobj.TS_DIR)
def find_machine_timeseries( machine_name ): # returns list of timeseries names for a given machine name pathobj = config.paths() imp_file = open(os.path.join(pathobj.DATA_BASE_DIR, "important.json"), 'r') names = json.load(imp_file) imp_file.close() return names[machine_name]
def create_machine_datadir(datadir_path, machinedir_path, machine_name): pathobj = config.paths(datadir=datadir_path) new_pathobj = config.paths(datadir=machinedir_path) paths = [ os.path.join(pathobj.TS_DIR, machine_name + "-" + name + ".data") for name in find_machine_timeseries(machine_name) ] json_paths = [ os.path.join(pathobj.TS_JSON_DIR, machine_name + "-" + name + ".data") for name in find_machine_timeseries(machine_name) ] if not os.path.exists(new_pathobj.DATA_DIR): os.mkdir(new_pathobj.DATA_DIR) if not os.path.exists(new_pathobj.TS_DIR): os.mkdir(new_pathobj.TS_DIR) if not os.path.exists(new_pathobj.TS_JSON_DIR): os.mkdir(new_pathobj.TS_JSON_DIR) for json_path, path in zip(json_paths, paths): shutil.copy(path, new_pathobj.TS_DIR)
def find_corr_matrix(dataset): paths = config.paths(dataset) anomaly_dict = dict() for index, path in enumerate([os.path.join(paths.TS_DIR, f) for f in os.listdir(paths.TS_DIR)]): anomaly_dict[path] = gateway.get_anomalies(path, "combined_hmm", None, percent=0.5) # print(path) paths = list() cor_mat = list() for i, path in enumerate(anomaly_dict): print i, anomaly_dict[path] paths.append(path) cor_mat.append(list()) weights = anomalies_to_onesided_expweights(anomaly_dict[path]) for j, otherpath in enumerate(anomaly_dict): cor_mat[i].append(anomaly_weight_overlap(anomaly_dict[otherpath], weights)) return paths, cor_mat
def find_corr_matrix(dataset): paths = config.paths(dataset) anomaly_dict = dict() for index, path in enumerate( [os.path.join(paths.TS_DIR, f) for f in os.listdir(paths.TS_DIR)]): anomaly_dict[path] = gateway.get_anomalies(path, "combined_hmm", None, percent=0.5) #print(path) paths = list() cor_mat = list() for i, path in enumerate(anomaly_dict): print i, anomaly_dict[path] paths.append(path) cor_mat.append(list()) weights = anomalies_to_onesided_expweights(anomaly_dict[path]) for j, otherpath in enumerate(anomaly_dict): cor_mat[i].append( anomaly_weight_overlap(anomaly_dict[otherpath], weights)) return paths, cor_mat
def find_machine_timeseries(machine_name): # returns list of timeseries names for a given machine name pathobj= config.paths() imp_file= open(os.path.join(pathobj.DATA_BASE_DIR, "important.json"), 'r') names= json.load(imp_file) imp_file.close() return names[machine_name]